""" The Maze Game:  Optimization for ReStat Paper.

        !!!  RUN WITH PYTHON 3.X !!!

        The simulation portion of the Maze Game project came after Python migrated to Python 3.X
        and so everthing was migrated to Python 3. You therefore need to run everything in this
        file in Python 3.x

"""

# IMPORTS  ===================================================================================================

import sys
import os
import csv
import pickle
import json
import math
import time
import copy
import traceback
import subprocess
import random
import datetime
import numpy as np
from   copy        import deepcopy
from   collections import defaultdict
from   operator    import itemgetter
from   sys         import argv
from   math        import ceil
from   math        import exp
from   datetime    import datetime
from   datetime    import timedelta
from   random      import shuffle as random_shuffle
from   random      import randint as random_randint
from   numpy       import cumsum as np_cumsum
import matplotlib
matplotlib.use('agg')
import matplotlib.style
matplotlib.style.use('seaborn-white')
matplotlib.rc('text', usetex=False)
matplotlib.rc('font', family='CMU Serif')
matplotlib.rc('font', serif='CMU Serif')
matplotlib.rc('font', size=13)
import matplotlib.pyplot as plt

# SETTINGS  ===================================================================================================

DEBUG                = False
PATH_OUTPUT          = 'results/'
GAME_LENGTH          = 500
START_POS            = (3, 3)      # Game board is (row,col)
BELOW_START_POS      = (2, 3)      # Game board is (row,col)
ABOVE_TOP_DOOR       = (6, 3)      # Game board is (row,col)
FORESIGHT_PRIZE_POS  = (0, 3)      # Used in Qmodel.update_indirect_model()
CONFIGS_LOSSES       = [3,  12]    # Losses
CONFIGS_GAINS        = [1,  10]    # Gains

# SIMULATION PARAMS
N_SIMS               = 1000
ALPHA                = 0.50
ALPHA_DECAY          = 0.0
BETA                 = 0.04605613825093967
BETA_DECAY           = 0.5504428051031898
BOOT                 = 20
BOOT_DECAY           = 0.5
EPSILON              = 0.0
EPSILON_DECAY        = 0.0
GAMMA                = 1.0
GAMMA_DECAY          = 0.0
TAU                  = 0.10156609631821896
TAU_DECAY            = 0.5709222754702735
COST                 = 0.05
AVERSION             = 2.643910824318072

# CONSTANTS
UP                   = 0
LEFT                 = 1
RIGHT                = 2
DOWN                 = 3
ALL_ACTIONS          = [UP, DOWN, LEFT, RIGHT]
NUM_ACTIONS          = len(ALL_ACTIONS)
DEFAULT_CONFIG       = 1
GRID_CELL_WIDTH      = 6
MATRIX_CELL_WIDTH    = 6
DISPLAY_PRIZE        = 1
DISPLAY_PRIZE_HIT    = 2
DISPLAY_DOOR         = 3
DISPLAY_DOOR_BLOCKED = 4
DISPLAY_WALL         = 5
DISPLAY_WALL_HIT     = 6
DISPLAY_PLAYER       = 7
PLOT_FONT_SIZE       = 15
PLOT_AXIS_FONT_SIZE  = 15
PLOT_LINE_WIDTH      = 1.25
PLOT_AXIS_LABEL_PAD  = 10
NO_AVERSION          = 1.0         # The loss aversion multiplier for gains when there is no loss aversion (i.e., no multiplication effect)

YLIM_EXPLORATION     = 15
YTICKS_QVALUE        = [0, 1, 2, 3, 4, 5, 6]
YTICKS_EXPLORATION   = [0, 3, 6, 9, 12, 15]
XTICKS_EXPLORATION   = [0, 100, 200, 300, 400, 500]

EXT_TXT                = '.txt'
EXT_CSV                = '.csv'
EXT_PICKLE             = '.pic'
PREFIX_LOG             = 'log-'
FN_LOGFILE             = ''

WIDTH_CONSOLE          = 80
WIDTH_PROGBAR          = 50
BASH_MAX_PROCS         = 100
INDENT                 = '    '
INDENT_DEBUG           = '        '
INDENT_ERROR           = '              '


# EXPERIMENTAL RESULTS   =============================================================================

""" Average exploration trajectories for humans are included here so that the optimizer can 
    reference them to calculate the Sum Squared Error for optimization
"""

HUMAN_RESULTS_1 =[
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06, 0.08, 0.08, 0.12, 0.16, 0.2, 0.28, 0.3, 0.3, 0.3, 0.3, 0.36, 0.56, 0.62, 0.62,
    0.7, 0.72, 0.74, 0.78, 0.82, 0.86, 0.92, 0.92, 0.92, 0.96, 0.98, 0.98, 1.0, 1.04, 1.04, 1.08, 1.16, 1.2, 1.22, 1.24,
    1.28, 1.32, 1.32, 1.32, 1.32, 1.32, 1.36, 1.4, 1.44, 1.46, 1.52, 1.58, 1.62, 1.62, 1.66, 1.68, 1.7, 1.7, 1.7, 1.7,
    1.7, 1.72, 1.74, 1.76, 1.76, 1.76, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.86, 1.86, 1.86, 1.86, 1.88, 1.88, 1.88, 1.92, 1.92,
    1.94, 1.96, 1.98, 1.98, 2.04, 2.08, 2.1, 2.14, 2.14, 2.14, 2.14, 2.16, 2.16, 2.18, 2.18, 2.18, 2.18, 2.18, 2.2, 2.24,
    2.26, 2.26, 2.26, 2.28, 2.3, 2.3, 2.3, 2.34, 2.34, 2.34, 2.4, 2.42, 2.42, 2.42, 2.42, 2.42, 2.42, 2.42, 2.42, 2.42,
    2.42, 2.42, 2.42, 2.42, 2.42, 2.44, 2.44, 2.44, 2.46, 2.5, 2.5, 2.5, 2.5, 2.52, 2.52, 2.52, 2.52, 2.52, 2.52, 2.52,
    2.56, 2.58, 2.62, 2.66, 2.66, 2.66, 2.66, 2.68, 2.68, 2.68, 2.68, 2.68, 2.68, 2.7, 2.7, 2.7, 2.72, 2.72, 2.72, 2.74,
    2.74, 2.74, 2.74, 2.74, 2.74, 2.74, 2.76, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78,
    2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.8, 2.8, 2.82, 2.84, 2.86, 2.86, 2.86, 2.86, 2.86, 2.86, 2.88, 2.88, 2.88, 2.88,
    2.88, 2.88, 2.88, 2.88, 2.88, 2.88, 2.9, 2.92, 2.92, 2.92, 2.92, 2.94, 2.94, 2.94, 2.94, 2.94, 2.94, 2.94, 2.94, 2.94,
    2.94, 2.94, 2.96, 2.96, 2.96, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98, 2.98,
    2.98, 2.98, 3.0, 3.02, 3.02, 3.02, 3.02, 3.02, 3.02, 3.02, 3.02, 3.02, 3.04, 3.04, 3.04, 3.06, 3.06, 3.06, 3.06, 3.06,
    3.06, 3.06, 3.06, 3.06, 3.06, 3.06, 3.12, 3.12, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14,
    3.14, 3.16, 3.16, 3.18, 3.18, 3.2, 3.2, 3.2, 3.2, 3.22, 3.22, 3.22, 3.24, 3.24, 3.24, 3.24, 3.24, 3.24, 3.24, 3.24,
    3.24, 3.24, 3.24, 3.24, 3.24, 3.24, 3.24, 3.24, 3.26, 3.28, 3.28, 3.28, 3.28, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3,
    3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.32, 3.32, 3.32, 3.32, 3.32, 3.32, 3.32, 3.34, 3.34,
    3.34, 3.34, 3.36, 3.36, 3.36, 3.36, 3.36, 3.36, 3.36, 3.36, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38, 3.38,
    3.38, 3.38, 3.38, 3.38, 3.38, 3.4, 3.4, 3.42, 3.42, 3.42, 3.44, 3.46, 3.48, 3.48, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
    3.5, 3.5, 3.52, 3.54, 3.56, 3.58, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6, 3.6,
    3.62, 3.62, 3.62, 3.62, 3.62, 3.62, 3.62, 3.62, 3.62, 3.62, 3.64, 3.64, 3.64, 3.66, 3.66, 3.66, 3.68, 3.68, 3.68, 3.68,
    3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.72, 3.74, 3.76, 3.78, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8,
    3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.82, 3.84, 3.84, 3.84,
    3.84, 3.86, 3.88, 3.9, 3.92, 3.94, 3.96, 3.98, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02, 4.02,
    4.02, 4.02, 4.02, 4.02, 4.04, 4.04, 4.04, 4.04]

HUMAN_RESULTS_3 = [
    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12, 0.16, 0.16, 0.16, 0.16, 0.18, 0.38, 0.48, 0.54, 0.56,
    0.58, 0.6, 0.62, 0.62, 0.64, 0.66, 0.72, 0.72, 0.76, 0.76, 0.8, 0.82, 0.9, 0.92, 0.96, 1.0, 1.02, 1.02, 1.02, 1.04, 1.1,
    1.16, 1.2, 1.22, 1.24, 1.24, 1.24, 1.3, 1.3, 1.32, 1.32, 1.34, 1.36, 1.36, 1.36, 1.38, 1.38, 1.38, 1.4, 1.4, 1.4, 1.42,
    1.44, 1.46, 1.46, 1.52, 1.54, 1.54, 1.56, 1.6, 1.62, 1.62, 1.64, 1.64, 1.66, 1.66, 1.66, 1.66, 1.68, 1.7, 1.7, 1.74,
    1.76, 1.78, 1.82, 1.86, 1.96, 2.02, 2.04, 2.1, 2.14, 2.18, 2.2, 2.24, 2.26, 2.3, 2.36, 2.42, 2.52, 2.56, 2.58, 2.6, 2.62,
    2.64, 2.64, 2.64, 2.64, 2.64, 2.66, 2.66, 2.66, 2.66, 2.7, 2.7, 2.7, 2.72, 2.74, 2.76, 2.76, 2.76, 2.78, 2.8, 2.8, 2.8,
    2.84, 2.88, 2.9, 2.92, 2.92, 2.96, 2.98, 3.0, 3.02, 3.02, 3.02, 3.02, 3.06, 3.06, 3.06, 3.06, 3.12, 3.14, 3.2, 3.24, 3.24,
    3.24, 3.24, 3.24, 3.24, 3.24, 3.28, 3.28, 3.28, 3.3, 3.3, 3.32, 3.32, 3.32, 3.36, 3.42, 3.44, 3.46, 3.5, 3.52, 3.56, 3.58,
    3.58, 3.58, 3.58, 3.58, 3.58, 3.62, 3.68, 3.7, 3.74, 3.76, 3.78, 3.8, 3.84, 3.88, 3.9, 3.92, 3.94, 3.94, 3.94, 3.94, 3.96,
    3.98, 3.98, 3.98, 3.98, 3.98, 3.98, 4.0, 4.04, 4.06, 4.06, 4.06, 4.06, 4.06, 4.06, 4.06, 4.06, 4.08, 4.1, 4.1, 4.1, 4.1,
    4.1, 4.1, 4.1, 4.12, 4.14, 4.16, 4.18, 4.2, 4.22, 4.24, 4.26, 4.26, 4.26, 4.26, 4.28, 4.3, 4.32, 4.36, 4.38, 4.4, 4.4,
    4.4, 4.4, 4.4, 4.4, 4.42, 4.42, 4.44, 4.44, 4.44, 4.46, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.5, 4.5, 4.5, 4.52, 4.56,
    4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.58, 4.58, 4.58, 4.58, 4.58, 4.58, 4.6, 4.6, 4.6, 4.6, 4.6, 4.6,
    4.6, 4.6, 4.6, 4.62, 4.62, 4.62, 4.62, 4.62, 4.64, 4.64, 4.64, 4.64, 4.64, 4.68, 4.68, 4.68, 4.68, 4.68, 4.7, 4.7, 4.72,
    4.76, 4.8, 4.8, 4.8, 4.8, 4.82, 4.82, 4.82, 4.82, 4.82, 4.82, 4.82, 4.84, 4.86, 4.88, 4.88, 4.88, 4.9, 4.9, 4.9, 4.9, 4.92,
    4.92, 4.92, 4.94, 4.94, 4.94, 4.96, 4.96, 4.96, 4.96, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98,
    4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 4.98, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
    5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.02, 5.02, 5.02, 5.02,
    5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.02, 5.04, 5.06, 5.06, 5.06, 5.06, 5.08, 5.08, 5.08,
    5.08, 5.08, 5.08, 5.08, 5.08, 5.08, 5.1, 5.1, 5.1, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12,
    5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.12, 5.14, 5.16, 5.16, 5.16, 5.16, 5.16,
    5.16, 5.16, 5.18, 5.2, 5.2, 5.2, 5.2, 5.2, 5.2, 5.2, 5.2, 5.2, 5.22, 5.22, 5.22, 5.22, 5.22, 5.22, 5.22, 5.22, 5.22, 5.24,
    5.26, 5.28, 5.3, 5.32, 5.34, 5.36, 5.36, 5.36, 5.36, 5.36, 5.36, 5.36, 5.36, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38,
    5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38, 5.38]

HUMAN_RESULTS_10 = [
    0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.1, 0.13, 0.15, 0.17, 0.18, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.27, 0.62, 0.79, 0.82,
    0.86, 0.92, 0.96, 0.99, 1.03, 1.05, 1.08, 1.11, 1.12, 1.12, 1.12, 1.12, 1.18, 1.28, 1.41, 1.47, 1.57, 1.6, 1.67,
    1.7, 1.71, 1.72, 1.73, 1.74, 1.75, 1.77, 1.78, 1.79, 1.82, 1.88, 1.94, 1.96, 1.99, 2.01, 2.04, 2.08, 2.11, 2.14,
    2.17, 2.18, 2.19, 2.2, 2.21, 2.24, 2.25, 2.26, 2.29, 2.32, 2.37, 2.38, 2.39, 2.42, 2.44, 2.45, 2.45, 2.46, 2.47,
    2.48, 2.5, 2.55, 2.56, 2.58, 2.62, 2.65, 2.66, 2.69, 2.72, 2.75, 2.78, 2.79, 2.79, 2.82, 2.83, 2.85, 2.85, 2.87,
    2.9, 2.9, 2.93, 2.93, 2.93, 2.94, 2.96, 2.97, 2.99, 2.99, 2.99, 2.99, 3.01, 3.02, 3.05, 3.06, 3.08, 3.1, 3.13, 3.14,
    3.16, 3.19, 3.21, 3.23, 3.25, 3.25, 3.25, 3.25, 3.25, 3.28, 3.31, 3.31, 3.33, 3.34, 3.35, 3.36, 3.38, 3.39, 3.42,
    3.45, 3.46, 3.46, 3.47, 3.49, 3.51, 3.52, 3.54, 3.57, 3.58, 3.58, 3.59, 3.62, 3.63, 3.64, 3.68, 3.72, 3.73, 3.77,
    3.79, 3.8, 3.82, 3.88, 3.91, 3.91, 3.91, 3.91, 3.94, 3.95, 3.95, 3.95, 3.97, 4.02, 4.05, 4.06, 4.07, 4.08, 4.08,
    4.08, 4.11, 4.12, 4.13, 4.15, 4.2, 4.22, 4.24, 4.25, 4.26, 4.28, 4.3, 4.31, 4.32, 4.34, 4.36, 4.37, 4.38, 4.4, 4.4,
    4.4, 4.41, 4.43, 4.44, 4.44, 4.44, 4.45, 4.46, 4.46, 4.46, 4.46, 4.49, 4.51, 4.51, 4.51, 4.53, 4.54, 4.55, 4.57,
    4.6, 4.61, 4.62, 4.63, 4.65, 4.65, 4.65, 4.65, 4.67, 4.68, 4.68, 4.68, 4.68, 4.7, 4.72, 4.74, 4.74, 4.76, 4.76,
    4.76, 4.77, 4.77, 4.77, 4.77, 4.79, 4.8, 4.8, 4.81, 4.82, 4.84, 4.86, 4.86, 4.87, 4.89, 4.9, 4.93, 4.96, 4.99, 4.99,
    4.99, 5.01, 5.02, 5.04, 5.05, 5.07, 5.1, 5.1, 5.11, 5.11, 5.13, 5.15, 5.16, 5.17, 5.18, 5.21, 5.23, 5.24, 5.26,
    5.26, 5.26, 5.27, 5.29, 5.3, 5.3, 5.31, 5.32, 5.33, 5.33, 5.33, 5.34, 5.35, 5.38, 5.39, 5.4, 5.41, 5.42, 5.44, 5.44,
    5.45, 5.45, 5.46, 5.46, 5.46, 5.47, 5.48, 5.5, 5.5, 5.52, 5.54, 5.54, 5.54, 5.55, 5.55, 5.55, 5.57, 5.57, 5.58,
    5.59, 5.62, 5.62, 5.63, 5.65, 5.68, 5.69, 5.7, 5.7, 5.7, 5.73, 5.74, 5.75, 5.76, 5.76, 5.77, 5.77, 5.77, 5.77, 5.78,
    5.78, 5.78, 5.78, 5.79, 5.8, 5.81, 5.84, 5.85, 5.85, 5.86, 5.86, 5.87, 5.89, 5.89, 5.89, 5.93, 5.93, 5.93, 5.94,
    5.97, 5.97, 5.98, 6.0, 6.01, 6.01, 6.05, 6.06, 6.07, 6.08, 6.1, 6.12, 6.14, 6.15, 6.16, 6.18, 6.19, 6.2, 6.22, 6.23,
    6.24, 6.25, 6.27, 6.29, 6.3, 6.3, 6.31, 6.32, 6.33, 6.34, 6.35, 6.37, 6.38, 6.38, 6.38, 6.39, 6.4, 6.4, 6.4, 6.43,
    6.46, 6.48, 6.5, 6.51, 6.52, 6.53, 6.54, 6.55, 6.57, 6.57, 6.57, 6.57, 6.59, 6.59, 6.59, 6.59, 6.61, 6.62, 6.62,
    6.63, 6.63, 6.63, 6.63, 6.65, 6.66, 6.67, 6.67, 6.69, 6.71, 6.73, 6.74, 6.74, 6.77, 6.78, 6.78, 6.78, 6.78, 6.78,
    6.79, 6.79, 6.79, 6.79, 6.8, 6.81, 6.83, 6.85, 6.85, 6.87, 6.9, 6.91, 6.91, 6.93, 6.94, 6.94, 6.95, 6.95, 6.95,
    6.95, 6.95, 6.95, 6.96, 6.97, 6.98, 7.0, 7.02, 7.02, 7.03, 7.03, 7.03, 7.04, 7.04, 7.04, 7.04, 7.05, 7.05, 7.06,
    7.06, 7.06, 7.07, 7.09, 7.11, 7.12, 7.14, 7.14, 7.14, 7.16, 7.16, 7.16, 7.16, 7.16, 7.17, 7.19, 7.21, 7.24, 7.25,
    7.25, 7.26, 7.29, 7.3, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.32, 7.32, 7.32, 7.32, 7.32]

HUMAN_RESULTS_12 = [
    0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.07, 0.12, 0.13, 0.14, 0.14, 0.15, 0.15, 0.16, 0.16, 0.16, 0.16, 0.23, 0.53, 0.64,
    0.72, 0.82, 0.9, 0.97, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.99, 0.99, 1.0, 1.07, 1.23, 1.35, 1.44, 1.51, 1.59,
    1.65, 1.7, 1.73, 1.76, 1.78, 1.79, 1.8, 1.82, 1.82, 1.84, 1.89, 1.97, 2.03, 2.09, 2.14, 2.21, 2.24, 2.27, 2.3, 2.31,
    2.35, 2.38, 2.4, 2.43, 2.47, 2.52, 2.58, 2.61, 2.67, 2.73, 2.77, 2.86, 2.89, 2.97, 3.02, 3.07, 3.13, 3.14, 3.14,
    3.15, 3.17, 3.2, 3.24, 3.27, 3.32, 3.39, 3.43, 3.44, 3.46, 3.53, 3.57, 3.58, 3.59, 3.62, 3.64, 3.69, 3.73, 3.79,
    3.81, 3.82, 3.83, 3.86, 3.89, 3.93, 3.98, 4.02, 4.05, 4.09, 4.11, 4.14, 4.17, 4.21, 4.22, 4.23, 4.23, 4.24, 4.25,
    4.27, 4.28, 4.3, 4.33, 4.35, 4.36, 4.39, 4.4, 4.41, 4.42, 4.47, 4.5, 4.52, 4.53, 4.58, 4.61, 4.68, 4.71, 4.74, 4.81,
    4.84, 4.88, 4.92, 4.96, 5.0, 5.03, 5.06, 5.09, 5.11, 5.12, 5.15, 5.18, 5.2, 5.22, 5.24, 5.26, 5.31, 5.33, 5.34,
    5.36, 5.37, 5.41, 5.44, 5.46, 5.48, 5.49, 5.51, 5.52, 5.52, 5.55, 5.57, 5.6, 5.64, 5.7, 5.72, 5.74, 5.75, 5.75,
    5.77, 5.79, 5.8, 5.8, 5.8, 5.82, 5.83, 5.85, 5.86, 5.87, 5.89, 5.92, 5.94, 5.97, 6.01, 6.05, 6.09, 6.1, 6.14, 6.16,
    6.19, 6.21, 6.23, 6.24, 6.25, 6.25, 6.28, 6.31, 6.32, 6.35, 6.37, 6.42, 6.45, 6.45, 6.48, 6.51, 6.55, 6.59, 6.63,
    6.66, 6.68, 6.7, 6.73, 6.75, 6.76, 6.8, 6.83, 6.87, 6.9, 6.92, 6.95, 6.96, 6.97, 7.0, 7.03, 7.05, 7.05, 7.06, 7.09,
    7.12, 7.15, 7.18, 7.2, 7.23, 7.25, 7.25, 7.26, 7.29, 7.32, 7.33, 7.36, 7.38, 7.38, 7.4, 7.44, 7.47, 7.49, 7.52,
    7.52, 7.54, 7.55, 7.56, 7.57, 7.6, 7.65, 7.68, 7.7, 7.71, 7.72, 7.73, 7.73, 7.74, 7.76, 7.79, 7.82, 7.86, 7.88,
    7.88, 7.88, 7.89, 7.92, 7.95, 7.98, 8.02, 8.05, 8.06, 8.07, 8.1, 8.11, 8.12, 8.13, 8.16, 8.17, 8.17, 8.17, 8.19,
    8.19, 8.19, 8.19, 8.2, 8.22, 8.23, 8.25, 8.27, 8.28, 8.31, 8.36, 8.38, 8.41, 8.42, 8.45, 8.49, 8.51, 8.52, 8.53,
    8.55, 8.58, 8.59, 8.6, 8.63, 8.65, 8.68, 8.71, 8.72, 8.75, 8.78, 8.81, 8.83, 8.84, 8.87, 8.88, 8.9, 8.92, 8.93,
    8.94, 8.97, 9.0, 9.01, 9.01, 9.01, 9.02, 9.05, 9.06, 9.08, 9.09, 9.1, 9.11, 9.14, 9.14, 9.14, 9.15, 9.2, 9.21, 9.23,
    9.24, 9.26, 9.27, 9.29, 9.3, 9.31, 9.32, 9.33, 9.33, 9.34, 9.34, 9.34, 9.34, 9.35, 9.36, 9.38, 9.4, 9.42, 9.45,
    9.49, 9.5, 9.51, 9.51, 9.51, 9.52, 9.55, 9.56, 9.57, 9.59, 9.61, 9.61, 9.62, 9.63, 9.67, 9.68, 9.73, 9.76, 9.77,
    9.79, 9.82, 9.84, 9.86, 9.88, 9.91, 9.92, 9.94, 9.94, 9.94, 9.94, 9.95, 9.96, 9.99, 10.01, 10.01, 10.01, 10.02,
    10.03, 10.03, 10.05, 10.07, 10.1, 10.11, 10.11, 10.13, 10.16, 10.19, 10.2, 10.22, 10.23, 10.23, 10.23, 10.23, 10.23,
    10.23, 10.24, 10.24, 10.26, 10.27, 10.27, 10.28, 10.29, 10.32, 10.33, 10.35, 10.36, 10.37, 10.38, 10.41, 10.44,
    10.46, 10.48, 10.5, 10.53, 10.55, 10.57, 10.6, 10.61, 10.64, 10.65, 10.66, 10.67, 10.67, 10.67, 10.67, 10.67, 10.67,
    10.68, 10.68, 10.69, 10.69, 10.69, 10.71, 10.72, 10.73, 10.75, 10.76, 10.76, 10.77, 10.77, 10.77, 10.78, 10.78,
    10.78, 10.78, 10.78, 10.78, 10.79, 10.8, 10.8, 10.81, 10.81, 10.81, 10.81, 10.81, 10.81, 10.81, 10.82, 10.82, 10.82,
    10.82, 10.82, 10.82, 10.82, 10.82, 10.82, 10.83, 10.83, 10.83, 10.83, 10.83, 10.83, 10.83]


# FORMATTING FUNCTIONS  =================================================================================================

TXT_NORMAL             = '\033[0m'
TXT_BOLD               = '\033[1m'
TXT_FAINT              = '\033[2m'
TXT_ITALIC             = '\033[3m'
TXT_UNDERLINE          = '\033[4m'

TXT_BLACK              = '\033[90m'
TXT_RED                = '\033[91m'
TXT_GREEN              = '\033[92m'
TXT_YELLOW             = '\033[93m'
TXT_BLUE               = '\033[94m'
TXT_MAGENTA            = '\033[95m'
TXT_CYAN               = '\033[96m'
TXT_WHITE              = '\033[97m'
HI_WHITE               = '\033[1m\033[9%sm\033[4%sm' % (0, 7)
HI_BLACK               = '\033[1m\033[9%sm\033[4%sm' % (7, 0)
HI_RED                 = '\033[1m\033[9%sm\033[4%sm' % (7, 1)
HI_GREEN               = '\033[1m\033[9%sm\033[4%sm' % (7, 2)
HI_YELLOW              = '\033[1m\033[9%sm\033[4%sm' % (7, 3)
HI_BLUE                = '\033[1m\033[9%sm\033[4%sm' % (7, 4)
HI_MAGENTA             = '\033[1m\033[9%sm\033[4%sm' % (7, 5)
HI_CYAN                = '\033[1m\033[9%sm\033[4%sm' % (7, 6)

def txt_black(s):   return TXT_BLACK   + s + TXT_NORMAL
def txt_red(s):     return TXT_RED     + s + TXT_NORMAL
def txt_green(s):   return TXT_GREEN   + s + TXT_NORMAL
def txt_yellow(s):  return TXT_YELLOW  + s + TXT_NORMAL
def txt_blue(s):    return TXT_BLUE    + s + TXT_NORMAL
def txt_magenta(s): return TXT_MAGENTA + s + TXT_NORMAL
def txt_cyan(s):    return TXT_CYAN    + s + TXT_NORMAL
def txt_white(s):   return TXT_WHITE   + s + TXT_NORMAL
def hi_white(s):    return HI_WHITE    + s + TXT_NORMAL
def hi_black(s):    return HI_BLACK    + s + TXT_NORMAL
def hi_red(s):      return HI_RED      + s + TXT_NORMAL
def hi_green(s):    return HI_GREEN    + s + TXT_NORMAL
def hi_yellow(s):   return HI_YELLOW   + s + TXT_NORMAL
def hi_blue(s):     return HI_BLUE     + s + TXT_NORMAL
def hi_magenta(s):  return HI_MAGENTA  + s + TXT_NORMAL
def hi_cyan(s):     return HI_CYAN     + s + TXT_NORMAL


# SUPPORTING FUNCTIONS  =================================================================================================

class _Logger(object):

    def __init__(self, fname=''):

        # Redirect STDOUT
        self.terminal = sys.stdout

        # Set log filename
        if not fname:
            fname = PREFIX_LOG + time_stamp_now_uuid() + EXT_TXT

        # Write APPEND to Log
        self.log = open(fname, mode="a")

    def write(self, msg):
        self.terminal.write(msg)
        self.log.write(strip_format_codes(msg))
        self.log.flush()

    def flush(self):
        pass

def start_logging(caption='', fname=''):

    # Change STDOUT
    sys.stdout = _Logger(fname=fname)

    # Determine Banner Caption
    if not caption:
        caption = argv[0]
    else:
        caption = str(caption).upper()
    caption +=  str(str(datetime.now())[:-10]).rjust(WIDTH_CONSOLE-len(caption))

    # Print Banner
    print()
    print('=' * WIDTH_CONSOLE)
    print(caption)
    print('=' * WIDTH_CONSOLE)

def argmax(values):
    """ Return probabilies w/r/t the max value, scaled to the number of max values """

    # Handle special cases
    if len(values) == 1:
        return [1,]

    probs     = []
    max_value = max(values)
    flag_max  = [i == max_value for i in values]
    num_maxes = sum(flag_max)

    for i in flag_max:
        if i:
            probs.append(1 / num_maxes)
        else:
            probs.append(0)
    return probs

def softmax(values, tau):
    """ Return probabilites for each value, scaled by the magnitude of the value and the softmax temperature (the hyper-parameter tau) """

    # Special case: only one option
    if len(values) == 1:
        return [1,]

    # Special case: if Tau very small, that is the argmax
    if tau < 0.0001:
        return argmax(values)

    # Replace none values with min
    for i in range(len(values)):
        if values[i] is None:
            values[i] = 0

    # Calc softmax probabilities
    try:
        scaled = [v/tau for v in values]
        denom  = sum([exp(v) for v in scaled])
        probs  = [exp(v)/denom for v in scaled]
        return probs

    # ERR: Divide by zero:  Tau is too small, that is the argmax
    except ZeroDivisionError:
        return argmax(values)

    # ERR: Overflow: Tau too small or exp(v) too large, again use argmax
    except OverflowError:
        return argmax(values)

    except Exception as e:
        raise e

def softmax_select_idx(values, tau):
    """ Select an index into values based on making a softmax selection """
    splits = np_cumsum(softmax(values, tau))
    p = random.random()
    for i in range(len(splits)):
        if p < splits[i]:
            return i
    return len(splits)-1

def sigmoid_interval(p, k=1.2):
    """ Sigmoid transformation for a unit interval.

            Returns: a value [0,1] associated with a proportion [0,1]

            Parameters:
                p -- proportion across unit interval [0,1]
                k -- shape of the simoid transition

            Special Case Value for k:
                 0   always return 0
               0 < k < 1  always return 1

            Continuous Values for k:
                1.0  step function with a sharp setup from 0 to 1 in the middle at p = 0.5
                1.1  very steep transition in the middle at p = 0.5
                1.2  transition looks much like a default logistic transition
                1.3  transition flattens, becoming more linear as k increases
                ...
                2.0  transition is almost linear by k = 2.0

            Source:

                Function inspired by suggestions made here:
                https://dinodini.wordpress.com/2010/04/05/normalized-tunable-sigmoid-functions/
    """
    assert p >= 0, 'Custom sigmoid function has a domain of [0,1]'
    assert p <= 1, 'Custom sigmoid function has a domain of [0,1]'
    assert k >= 0, 'Shaping parameter must always be > = 0'

    k = float(k)

    if k < 0.0000000001 : return 0   # special case
    if k < 1.0          : return 1   # special case

    p = (p * 2) - 1

    if not p: return 0.5  # undefined at inflection point
    if p < 0: return 0.5 + ((-k * p) / (-k + p + 1)) * .5
    else:     return 0.5 + ((-k * p) / (-k - p + 1)) * .5

def decay(v, p, d):
    """ Generalized decay function over unit interval.

            Returns: initial value rescaled based on decay factor

            Parameters:

                v:         Starting value
                p:         Percent completed    must be in a unit interval [0,1]
                d:         Decay trajectory     must be in a unit interval [0,1]

            Example values for d:

                d = 0.00   No decay             return starting value
                d = 0.25   Slow onset           decay slowly and then accelerate
                d = 0.50   Linear decay         45 degree) decay across interval
                d = 0.75   Fast onset           decay fast and then deccelerate
                d = 1.00   Immediate decay      return

            Author:  KenYounge@gmail.com
            License: GNU General Public License with attribution
    """

    # No decay
    if d == 0.0: return v

    # Slow onset
    if d <= 0.5: return v * (1 - p ** (1.0 / (d * 2)))

    # Linear decay
    if d == 0.5: return v * (1 - p)

    # Fast onset
    if d  > 0.5: return v * (decay(1, p, 0.5) - (decay(1, 1 - p, 1 - d) - decay(1, 1 - p, 0.5)))

    # Immediate decay
    if d == 1.0: return 0

def average_lists(lists):
    """ Return a list with the column-by-column average calculated across other lists. """
    return [float(sum(col))/len(col) for col in zip(*lists)]

def percentile_lists(lists, pctile, max_len):
    """ Build a list with the column-by-column percentile calculated across other lists. """

    data    = [ list() for _ in range(max_len)]
    pctiles = list()

    # reshape lists so they align by move
    for lst in lists:
        for i in range(max_len):
            if i < len(lst):
                data[i].append(lst[i])

    for i in range(max_len):
        curlist = sorted(data[i])
        if len(curlist) > 0:
            cut = int(len(curlist) * float(pctile))
            if cut > len(curlist)-1:
                cut = len(curlist)-1
            if cut == -1: cut = 0
            assert cut >= 0
            assert cut < len(curlist)
            pctiles.append(curlist[cut])
    return pctiles

def fsplit(x, y, jump, ndigits):
  while x < y or abs(x-y) < 1/(10 ** (ndigits+1)):
    yield round(x, ndigits)
    x += jump

def action2str(action):
    if action is None:    return "None"
    if action == '':      return "None"
    if action == LEFT:    return "Left"
    if action == UP:      return "Up"
    if action == DOWN:    return "Down"
    if action == RIGHT:   return "Right"
    return "None"

def progbar(x, msg=''):
    """Display progbar where x is ratio completed (e.g.,  x=.477 becomes 48%)"""
    try:
        if x > 1.0: x = 1.0
        x = float(x) * 100
        w = min(100, WIDTH_PROGBAR - 9)
        p = min(100, int(ceil(float(x * w) / float(100))))
        sys.stdout.write('\r' + TXT_WHITE + msg + '  ' + TXT_GREEN +
             str(' ' + str("%.1f" % round(x, 1)).rjust(4, ' ') + '%').ljust(7) +
             TXT_WHITE + ' [' +
                TXT_NORMAL + '#' * p + '-' * (w - p) +
             TXT_NORMAL + ']')
        if x >= 100: sys.stdout.write('\n')
        sys.stdout.flush()
    except Exception as e: print(e)

def print_b(msg, divider='=', width_out=WIDTH_CONSOLE, stamp_time=True):
    """Print message in a BANNER"""
    if stamp_time: msg +=  str(str(datetime.now())[:-10]).rjust(width_out-len(msg))
    print()
    print(divider * width_out)
    print(msg)
    print(divider * width_out)

def print_d(divider='-'):
    """Print a HORIZ DIVIDER Across entire Console"""
    print(divider * WIDTH_CONSOLE)

def print_h(msg):
    """Print message in a sub-HEADER"""
    print()
    print(' >', str(msg).upper())
    print()

def print_c(count, msg='', col_width=20):
    """Print a COUNT and description"""
    if count is None:
        count = ''
    elif str(count).isnumeric():
        count = "{:,}".format(count).rjust(col_width)
    else:
        count = str(count).rjust(col_width)
    print(count + '  ' + str(msg).rstrip())

def print_w(msg):
    print_l(msg, fcolor=TXT_RED)

def print_e(e=None, trace=True):
    """Print an ERROR"""
    try:
        # Log Error (if given)
        if e:
            e = str(e).strip().upper()
            tracebk = ''
            if trace: tracebk = traceback.format_exc()
            print_l(e, error=True, fcolor=TXT_RED, tracebk=tracebk)

        # No Error, so log traceback
        else:
            e = ''
            tracebk = traceback.format_exc()
            print_l(e, error=True, fcolor=TXT_RED, tracebk=tracebk)

    except Exception as e: print('CRITICAL TRACKER ERROR:', e)

def print_l(msg, error=False, fcolor='', bcolor='', bold=False, tracebk=''):
    """Print a detailed LOG LINE"""
    # PREP
    try:
        if type(msg) is not str and type(msg) is not Exception:
            try:
                msg = json.dumps(msg)                    # convert obj to string
            except:
                try: msg = str(msg)
                except: msg = 'Cannot log - invalid msg'
        if str(msg).startswith('\n'): msg = msg[1:]      # remove newline prefix
        if error or fcolor==TXT_RED: msg = str(msg).replace('\n', ';  ')     # flatten to one line
    except Exception as e:
        try: msg += '; Error logging msg: ' + str(e)
        except:
            try: msg = 'Unable to log msg: ' + str(e)
            except: msg = '---LOGGING CRASHED---'

    # Print
    try:

        # Errors
        if error:
            if tracebk:
                lines = str(tracebk).strip().split('\n')
                out   = []
                try:

                    for line in lines[1:-1]:
                        line = str(line).strip()
                        if str(line).startswith('File '):
                            out.append(line)
                        else:
                            if out: out[-1] = out[-1] + ':  ' + line
                            else:   out.append(line)
                    if out:
                        print()
                        print(TXT_RED + '<ERROR> ', msg, TXT_NORMAL)
                        for line in out:
                            print(TXT_RED + line + TXT_NORMAL)
                        print()
                except Exception as e: print('TRACEBACK ERROR: ' + str(e))
            else:
                print(TXT_RED + '<ERROR> ', msg, TXT_NORMAL)

       # Non-Errors
        else:
            msg = INDENT + str(bcolor) + msg
            if bold: msg = TXT_BOLD + msg
            print(fcolor + msg + TXT_NORMAL)

    except Exception as e:
        try:
            print('CONSOLE ERROR: ' + str(e))
            print('ORIGINAL TEXT: ' + str(msg))
            traceback.print_exc()
        except: pass

def print_r(x, name='', num_rows=5):
    """Print the top num_rows ROWS"""
    if type(x) is dict:
        print(name, dict(list(x.items())[0:num_rows]))
    elif type(x) is set:
        print(name, set([item for item in list(x)[0:num_rows]]))
    else:
        print(name, x[0:num_rows])

def print_program_help(docstring, program):
    print()
    print_b(str(program).upper())
    print()
    print(docstring)
    print()
    print_d()
    print()

def list_fnames(dirpath):
    return next(os.walk(dirpath))[2]

def cache(obj, path, name):
    if not str(path).endswith('/'): path += '/'
    name += EXT_PICKLE
    with open(path + name, 'wb') as cachefile:
        pickle.dump(obj, cachefile, protocol=pickle.HIGHEST_PROTOCOL)
    print_c(len(obj), 'cached ' + name)

def decache(path, name, verbose=False):
    if not str(path).endswith('/'): path += '/'
    name += EXT_PICKLE
    with open(path + name, 'rb') as f:
        obj = pickle.load(f)
    if verbose: print_c(len(obj), 'decached ' + name)
    return obj

def load_pickled_game(fname):
    try:
        with open(fname, 'r') as f:
            g = pickle.loads(f.read())
    except UnicodeDecodeError:
        with open(fname, 'rb') as f:
            g = pickle.load(f)
    except: raise
    return g

def load_lst(fname, force2int=False, must_exist=True):
    out = list()
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'Ur', encoding='latin1') as f:  # universal newlines support; read-only
        lines = f.readlines()
        for line in lines:
            value = str(line).strip()
            if force2int: value = int(value)
            out.append(value)
    return out

def load_set(fname, force2int=False, must_exist=True):
    out = set()
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'Ur', encoding='latin1') as f:   # universal newlines support; read-only
        lines = f.readlines()
        for line in lines:
            value = str(line).strip()
            if force2int: value = int(value)
            out.add(value)
    return out

def load_dct(fname, force2int=False, must_exist=True):
    out = dict()
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'Ur', encoding='latin1') as f:   # universal newlines support; read-only
        for r in csv.reader(f, dialect='excel'):
            if force2int:
                if r[0] != "": out[r[0]] = int(r[1])
            else:
                if r[0] != "": out[r[0]] = r[1]
    return out

def load_str(fname, must_exist=True):
    out = ''
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'r', encoding='latin1') as f:   # read-only
        out = f.read()
    return out

def load_csv(fname, quote=None, must_exist=True):
    out = list()
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'Ur') as f:   # universal newlines support; read-only
        csv_reader = csv.reader(f, dialect='excel', quotechar=quote) if quote else csv.reader(f, dialect='excel')
        for r in csv_reader:
            out.append([value for value in r])
    return out

def load_tsv(fname, must_exist=True, strip_header=True, print_len=True):
    out = list()
    fname = str(fname.strip())
    if not os.path.exists(fname):
        if must_exist:
            raise IOError
        else:
            return out
    with open(fname, 'Ur', encoding='latin1') as f:  # universal newlines support; read-only
        lines = f.readlines()
        if strip_header: lines = lines[1:]
        for line in lines:
            values = [str(value).strip() for value in str(line).split('\t')]
            out.append(values)
    if print_len:
        print()
        print('  ' + fname)
        print()
        print_c(len(out), 'rows')
    return out

def load_citations(path_and_fname):
    return sorted(list({(int(citing), int(cited)) for citing, cited in load_csv(path_and_fname, must_exist=True)[1:]}))

def save_str(txt, fname):
    fname = str(fname.strip())
    delete_file(fname)
    with open(fname, 'w') as f:
            f.write(txt)
    assert os.path.exists(fname)

def save_lst(lst, fname):
    fname = str(fname.strip())
    if lst is None: lst = list()
    delete_file(fname)
    with open(fname, 'w') as f:
        for i in range(len(lst)):
            item = str(lst[i])
            if i < (len(lst) - 1): item += "\n"
            f.write(item)

def save_csv(lst, fname):
    fname = str(fname.strip())
    delete_file(fname)
    with open(fname, 'w') as f:
        writer = csv.writer(f, dialect='excel')
        writer.writerows(lst)

def save_dct(dct, fname):
    fname = str(fname.strip())
    delete_file(fname)
    with open(fname, 'w') as f:
        writer = csv.writer(f, dialect='excel')
        for key in dct.keys():
            writer.writerow( (key, dct[key]))

def append_csv(lst, fname):
    fname = str(fname.strip())
    with open(fname, 'a') as f:
        writer = csv.writer(f, dialect='excel')
        writer.writerow(lst)
        f.flush()

def append_str(txt, fname):
    fname = str(fname.strip())
    if txt:
        with open(fname, 'a') as f:
                f.write(txt)

def delete_file(fname):
    fname = str(fname.strip())
    try: os.remove(fname)
    except: pass

def delete_files(fpath):
    if not fpath.endswith('/'): fpath += '/'
    fnames = list_fnames(fpath)
    for fname in fnames:
        delete_file(fpath + fname)

def combine_csv(dirpath, prefix, has_headers=True, verbose=False, num_to_expect=None):
    data = []
    headers = []
    found = set()
    if not str(dirpath).endswith('/'): dirpath += '/'
    for fn in list_fnames(dirpath):
        if str(fn).startswith(prefix):
            found.add(int(''.join(i for i in fn if i.isdigit())))
            if verbose: print('  ...loading', fn)
            newdata = load_csv(dirpath + fn, must_exist=True)
            if has_headers and newdata:
                headers = newdata[0]
                newdata = newdata[1:]
            if newdata:
                data.extend(newdata)
    if num_to_expect:
        if num_to_expect and len(found) < num_to_expect:
            out = sorted(list(range(num_to_expect)))
            for i in found:
                out.remove(i)
            print_h('Missing '+ str(len(out)) + ' Files:')
            out_str = '['
            for i in out:
                out_str += str(i) + ','
            out_str += ']'
            print(out_str)
        else:
            print('  All', num_to_expect, 'files found')
    if headers:
        dataout = [headers, ]
        dataout.extend(data)
        data = dataout

    if verbose: print('Combined ', len(data), 'rows of data')

    return data

def debug_dct(dct, fname, n_rows=100, verbose=False):
    fname = str(fname.strip())
    if not fname.endswith('.csv'): fname += '.csv'
    if n_rows > len(dct): n_rows = len(dct)
    if os.path.exists(fname): os.remove(fname)
    if verbose:
        print('#' * WIDTH_CONSOLE)
        print('DEBUG: ' + fname)
        print('#' * WIDTH_CONSOLE)
        keys = list(dct.keys())
        for key in keys[:n_rows]:
            print((key, dct[key]))
    else:
        with open(fname, 'w') as f:
            writer = csv.writer(f, dialect='excel')
            keys = list(dct.keys())
            random_shuffle(keys)
            for key in keys[:n_rows]:
                writer.writerow( (key, dct[key]))

def debug_lst(lst, fname, n_rows=100, verbose=False):
    if n_rows > len(lst): n_rows = len(lst)
    fname = str(fname.strip())
    if not fname.endswith('.csv'): fname += '.csv'
    indices = list(range(len(lst)))
    random_shuffle(indices)
    if os.path.exists(fname): os.remove(fname)
    if verbose:
        print('#' * WIDTH_CONSOLE)
        print('DEBUG: ' + fname)
        print('#' * WIDTH_CONSOLE)
        for i in indices[:n_rows]:
            item = str(lst[i])
            print(item)
    else:
        with open(fname, 'w') as f:
            for i in indices[:n_rows]:
                item = str(lst[i])
                if i < (len(lst) - 1): item += "\n"
                f.write(item)

def debug_set(s, fname, n_rows=100, verbose=False):
    lst = list(s)
    debug_lst(lst, fname=fname, n_rows=n_rows, verbose=verbose)

def format_val(val, digits=2, min_val=None, supress_zero=False):
    if val is None: return ''
    if val == '': return ''
    if min_val is not None and val <= min_val:
        return '.'
    if digits is None: digits = 2
    mask = "%0." + str(digits) + "f"
    if supress_zero and abs(val) < (1 / (10 ** digits) ):
        return ' '
    elif val >= 0:
        return ' ' + str(mask % val)
    else:
        return str(mask % val)

def split_list(alist, num_lists):
    original_length = len(alist)
    return [alist[i * original_length // num_lists: (i + 1) * original_length // num_lists]
            for i in range(num_lists)]

def strip_format_codes(s):

    for FRMT in  [ TXT_NORMAL, TXT_BOLD, TXT_FAINT, TXT_ITALIC, TXT_UNDERLINE, TXT_BLACK, TXT_RED, TXT_GREEN,
                   TXT_YELLOW, TXT_BLUE, TXT_MAGENTA, TXT_CYAN, TXT_WHITE, HI_WHITE, HI_BLACK, HI_RED, HI_GREEN,
                   HI_YELLOW, HI_BLUE, HI_MAGENTA, HI_CYAN ]:
        try:
            s = str(s).replace(FRMT, '')
        except: pass
    return s

def is_number(s):
    try:    float(s)
    except: return False
    else:   return True

def git_version():
    return str(subprocess.check_output('git rev-parse --short HEAD', shell=True)).strip()

def parse_iso8601tz(date_string):
    """return a datetime object for a string in ISO 8601 format.

    This function parses strings in exactly this format:
    '2012-12-26T13:31:47.823-08:00'

    Sadly, datetime.strptime's %z format is unavailable on many platforms,
    so we can't use a single strptime() call.
    """

    dt = datetime.strptime(date_string[:-6],
                                    '%Y-%m-%dT%H:%M:%S.%f')

    # parse the timezone offset separately
    delta = timedelta(minutes=int(date_string[-2:]),
                               hours=int(date_string[-5:-3]))
    if date_string[-6] == '-':
        # add the delta to return to UTC time
        dt = dt + delta
    else:
        dt = dt - delta
    return dt

def time_stamp_now_uuid():
    ts = str(str(datetime.now())[:-10]).replace(' ','-').replace(':','-')
    ts = ts[:10] + '_' + ts[11:] + '_' + quasi_uniq_id()
    return ts

def quasi_uniq_id():
    return str(hex(int((time.time()*10000000)/float(random_randint(1,10000000))))[2:])

def running_in_jupyter():
    try:
        return('ipykernel' in sys.modules)
    except:
        return False

def next_sim_id(path=PATH_OUTPUT):
    sim_id = 1000
    new_id = 0
    try:
        files = list_fnames(path)
        if not files: return sim_id
        for file in files:
            if file == '__init__.py': continue
            words = file.split()
            if str(words[1]).isnumeric():
                new_id = int(words[1])
            if new_id >= sim_id:
                sim_id = new_id + 1
    except Exception as e:
        print('Error finding next sim number: ', e)
        sim_id = 9999
    return sim_id

def extract_exploration(games):

    data    = []
    config    = games[0].config
    num_moves = games[0].max_moves

    for g in games:
        assert g.config == config, 'All games in set of games must have same config'
        top_open   = False
        move_no    = 0
        trajectory = np.zeros(num_moves)

        for position in g.lst_positions:

            # increment move index (if not at start)
            if position != START_POS:
                move_no += 1

            # check if pass through top door
            if position == ABOVE_TOP_DOOR:
                top_open = True

            # check if moving down AFTER top door open
            if position == BELOW_START_POS and top_open:
                trajectory[move_no:GAME_LENGTH] = np.max(trajectory[:move_no]) + 1

        data.append(trajectory[:GAME_LENGTH])
    return data

def avg_exploration_by_step(games):
    data = extract_exploration(games)
    avg  = average_lists(data)
    return avg

def avg_sim_param(sims, track_name):
    data = []
    for sim in sims:
        data.append(sim.history[track_name])
    avg  = average_lists(data)
    return avg

def calc_error(sim_games_a, sim_games_b, humans_a, humans_b):
    """ Calculate simulation error:

        If a placebo test,

            error is simply the difference between losses and gains

        If a treatment test,

            difference between average trajectory of exploration for simulations compared to humans, in the "A" condition (for example "Losses")
          + difference between average trajectory of exploration for simulations compared to humans, in the "B" condition (for example "Gains")
          + difference in differences in trajectory of exploration (i.e., simulated A-to-B spread less human A-to-B spread)

    """
    totals = []
    sims_a = avg_exploration_by_step(sim_games_a)
    sims_b = avg_exploration_by_step(sim_games_b)

    for i in range(len(sims_a)):
        err_a  = abs(sims_a[i] - humans_a[i]) ** 2
        err_b  = abs(sims_b[i] - humans_b[i]) ** 2
        err_diff = abs((humans_a[i] - humans_b[i]) - (sims_a[i] - sims_b[i])) ** 2
        totals.append(err_a + err_b + err_diff)

    total_err = int(sum(totals))

    return total_err

def format_notes(nsims, sim_id, alpha, beta, epsilon, gamma, boot, tau, cost, aversion, alpha_decay, beta_decay, boot_decay, epsilon_decay, gamma_decay, tau_decay, err_total=0, err_12_10=0, err_3_1=0):
    notes = ('SIM BATCH  ' + str(sim_id) + '    (with ' + str(nsims) + ' sims)' +  '\n' +
             '\n' +
             '    Alpha      = ' + str(alpha) + '\n' +
             '       decay   = ' + str(alpha_decay) + '\n' +
             '\n' +
             '    Beta       = ' + str(beta) + '\n' +
             '       decay   = ' + str(beta_decay) + '\n' +
             '\n' +
             '    Boot       = ' + str(boot) + '\n' +
             '       decay   = ' + str(boot_decay) + '\n' +
             '\n' +
             '    Epsilon    = ' + str(epsilon) + '\n' +
             '       decay   = ' + str(epsilon_decay) + '\n' +
             '\n' +
             '    Gamma      = ' + str(gamma) + '\n' +
             '       decay   = ' + str(gamma_decay) + '\n' +
             '\n' +
             '    Tau        = ' + str(tau) + '\n' +
             '       decay   = ' + str(tau_decay) + '\n' +
             '\n' +
             '    Cost       = ' + str(cost) + '\n' +
             '    Aversion   = ' + str(aversion) + '\n' +
             '\n' +
           (('    Error      = ' + str(err_total) + '\n') if err_total else '') +
           (('     Err 12_10 = ' + str(err_total) + '\n') if err_12_10 else '') +
           (('     Err 3_1   = ' + str(err_total) + '\n') if err_3_1 else '') +
             '\n'
             )
    return notes

def plot_comparison(label_a, data_a, label_b, data_b, fname):
    """ Graph comparison of the average exploration for two history """
    plt.close('all')
    x = list(range(len(data_a)))
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    ax.set_ylim(0, 15)
    plt.yticks(YTICKS_EXPLORATION)
    plt.xticks(XTICKS_EXPLORATION)
    if YLIM_EXPLORATION: ax.set_ylim(0,YLIM_EXPLORATION)
    if YTICKS_EXPLORATION: ax.set_yticks(YTICKS_EXPLORATION)
    ax.plot(x, data_a, lw=PLOT_LINE_WIDTH, ls='--', label=label_a, color='black',  alpha=.85)
    ax.plot(x, data_b, lw=PLOT_LINE_WIDTH, ls='-',  label=label_b, color='black', alpha=.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Number of Exploratory Actions',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    plt.legend(frameon=1, loc=2)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')
    fname = PATH_OUTPUT + fname
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_sim_12_10_3_1(data_12, data_10, data_3, data_1, fname):
    """ Plot 4-way comparison of simulations"""

    # Prep data
    x = list(range(GAME_LENGTH))
    data_12 = data_12[:GAME_LENGTH]
    data_10 = data_10[:GAME_LENGTH]
    data_3  = data_3[:GAME_LENGTH]
    data_1 = data_1[:GAME_LENGTH]

    # Prep plots
    plt.close('all')
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex='all')
    plt.xticks(XTICKS_EXPLORATION)
    fig.set_size_inches(6.5, 10)

    # Plot 12 vs 10
    ax = ax1
    if YLIM_EXPLORATION: ax.set_ylim(0,YLIM_EXPLORATION)
    if YTICKS_EXPLORATION: ax.set_yticks(YTICKS_EXPLORATION)
    ax.tick_params(axis='x', which='major')
    ax.plot(x, data_12, lw=PLOT_LINE_WIDTH, ls='--', label='Losses', color='black',  alpha=.85)
    ax.plot(x, data_10, lw=PLOT_LINE_WIDTH, ls='-',  label='Gains', color='black', alpha=.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Exploration',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.legend(frameon=1, loc=2)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')

    # Plot 3 vs 1
    ax = ax2
    if YLIM_EXPLORATION: ax.set_ylim(0,YLIM_EXPLORATION)
    if YTICKS_EXPLORATION: ax.set_yticks(YTICKS_EXPLORATION)
    ax.tick_params(axis='x', which='major')
    ax.plot(x, data_3, lw=PLOT_LINE_WIDTH, ls='--', label='Losses', color='black',  alpha=.85)
    ax.plot(x, data_1, lw=PLOT_LINE_WIDTH, ls='-',  label='Gains', color='black', alpha=.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Exploration',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.legend(frameon=1, loc=2)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')

    # Show and Save
    fname = PATH_OUTPUT + fname
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_parameter(sims, param_name, sim_id, ylabel=None, prefix=''):
    plt.close('all')
    y = avg_sim_param(sims, param_name)
    x = [move for move in range(GAME_LENGTH)]
    if not ylabel: ylabel = str(param_name).capitalize()
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    plt.xticks(XTICKS_EXPLORATION)
    ax.plot(x, y, lw=PLOT_LINE_WIDTH, color='blue', alpha=0.9)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel(ylabel, labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - PARAM ' + str(param_name).capitalize() + '.pdf'
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_prob_down(sims_12, sims_10, sim_id, prefix=''):
    """ Plot probability of choosing to go DOWN from softmax at each step of game. Compare config 12 & 10 as comparison of interest for ReStat article. """
    plt.close('all')
    down_prob_12 = avg_sim_param(sims_12, 'down_prob')
    down_prob_10 = avg_sim_param(sims_10, 'down_prob')
    x = [move for move in range(GAME_LENGTH)]
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    plt.title('Probability of Going Down from the Starting Position')
    plt.xticks(XTICKS_EXPLORATION)
    ax.plot(x, down_prob_12, lw=PLOT_LINE_WIDTH, ls='--', label='Losses', color='black',  alpha=0.85)
    ax.plot(x, down_prob_10, lw=PLOT_LINE_WIDTH, ls='-',  label='Gains',  color='black', alpha=0.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Probability of Exploration', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    plt.legend(frameon=1, loc=1)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')
    fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - PROB Down.pdf'
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_q_values(sims_12, sims_10, sim_id, prefix=''):
    """ Plot the Q values of going UP and DOWN from starting position. Compare Config 12 & 10 as comparison of interest for ReStat article."""
    plt.close('all')
    x = [move for move in range(GAME_LENGTH)]
    q_value_up_avg_12   = avg_sim_param(sims_12, 'q_value_up')
    q_value_up_avg_10   = avg_sim_param(sims_10, 'q_value_up')
    q_value_down_avg_12 = avg_sim_param(sims_12, 'q_value_down')
    q_value_down_avg_10 = avg_sim_param(sims_10, 'q_value_down')

    # Q-values for going UP
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    plt.title('Q-Value for Going Up')
    ax.set_ylim(0, 6)
    ax.set_yticks(YTICKS_QVALUE)
    plt.xticks(XTICKS_EXPLORATION)
    ax.plot(x, q_value_up_avg_12, lw=PLOT_LINE_WIDTH, ls='--', label='Losses', color='black', alpha=0.85)
    ax.plot(x, q_value_up_avg_10, lw=PLOT_LINE_WIDTH, ls='-', label='Gains',  color='black', alpha=0.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Q Value', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    plt.legend(frameon=1, loc=7)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')
    fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - Q_Up.pdf'
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

    # Q-values for going DOWN
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    plt.title('Q-Value for Going Down')
    ax.set_ylim(0, 6)
    ax.set_yticks(YTICKS_QVALUE)
    plt.xticks(XTICKS_EXPLORATION)
    ax.plot(x, q_value_down_avg_12, lw=PLOT_LINE_WIDTH, ls='--', label='Losses', color='black', alpha=0.85)
    ax.plot(x, q_value_down_avg_10, lw=PLOT_LINE_WIDTH, ls='-',  label='Gains',  color='black', alpha=0.85)
    ax.xaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.yaxis.grid(True, linestyle='-', color='grey', alpha=.25)
    ax.set_xlabel('Move Number', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Q Value', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    plt.legend(frameon=1, loc=7)
    frame = ax.legend_.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('white')
    fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - Q_Down.pdf'
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_visits(games, fname):
    config = games[0].config
    rows  = games[0].rows
    cols  = games[0].cols
    V = np.zeros((rows,cols), dtype=np.int)
    count = 0
    x = list()
    y = list()
    for positions in [g.lst_positions for g in games]:
        count += 1
        for pos in positions:
            V[pos] += 1
            x.append(pos[1])
            y.append(pos[0])
    V = V/float(count)
    plot_heatmap(x, y, V, 'Visits - Config ' + str(config), rows, cols, fname=fname, digits=0)

def plot_q_matrix(sims, fname):
    """ Plot 2-D heatmap of q-matrix. """

    config    = sims[0].g.config
    rows      = sims[0].g.rows
    cols      = sims[0].g.cols

    QTOT  = np.zeros((rows,cols), dtype=np.float)
    COUNT = np.zeros((rows,cols), dtype=np.float)
    for sim in sims:
        for row in range(rows):
            for col in range(cols):
                if sim.MAX_Q[row,col] is not None and not math.isnan(sim.MAX_Q[row,col]):
                    QTOT[row,col]  = QTOT[row,col]  + max(sim.Q[row,col])
                    COUNT[row,col] += 1
    QAVG = QTOT/COUNT

    x = list()
    y = list()
    for row in range(rows):
        for col in range(cols):
            if QAVG[row,col] is not None and not math.isnan(QAVG[row,col]):
                q = int(float(QAVG[row,col]) * 100)
                for i in range(q):
                    y.append(row + 0.5)
                    x.append(col + 0.5)

    plot_heatmap(x, y, QAVG, 'Q Matrix - Config ' + str(config), rows, cols, fname=fname, digits=1)

def plot_heatmap(x, y, totals, title, rows, cols, fname, digits=1):
    plt.hist2d(x, y, bins=(list(range(cols+1)),list(range(rows+1))), cmap=plt.summer())
    plt.title(title)
    for row in range(rows):
        for col in range(cols):
            value = totals[row,col]
            if digits==0:
                value = int(value)
            else:
                value = round(value, digits)
            plt.text(col + 0.25, row + 0.35, str(value), fontsize=14)
    fname = PATH_OUTPUT + fname
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_cdf(data, fname):
    num_bins = 20
    counts, bin_edges = np.histogram(data, bins=num_bins, normed=True)
    cdf = np.cumsum(counts)
    fig, ax = plt.subplots()
    ax.plot(bin_edges[1:], cdf / cdf[-1])
    fname = PATH_OUTPUT + fname
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def plot_quartiles(data, fname):
    avg = average_lists(data)
    p75 = percentile_lists(data, 0.75, max_len=GAME_LENGTH)
    p50 = percentile_lists(data, 0.50, max_len=GAME_LENGTH)
    p25 = percentile_lists(data, 0.25, max_len=GAME_LENGTH)
    plt.close('all')
    x = list(range(len(avg)))
    fig, ax = plt.subplots()
    fig.set_size_inches(6.5, 5)
    ax.set_ylim(0, 15)
    plt.yticks([0, 3, 6, 9, 12, 15])
    plt.xticks([0, 100, 200, 300, 400, 500])
    ax.plot(x, avg, label='Average',         lw=PLOT_LINE_WIDTH, color='red',   alpha=0.9)
    ax.plot(x, p75, label='75th percentile', lw=PLOT_LINE_WIDTH, color='grey',  alpha=0.2)
    ax.plot(x, p25, label='25th percentile', lw=PLOT_LINE_WIDTH, color='grey',  alpha=0.2)
    ax.plot(x, p50, label='50th percentile', lw=PLOT_LINE_WIDTH, color='black', alpha=0.8)
    ax.fill_between(x, p75, p25, facecolor='grey', alpha=0.1)
    ax.set_xlabel('Move Number', labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    ax.set_ylabel('Number of Exploratory Actions',labelpad=PLOT_AXIS_LABEL_PAD, fontsize=PLOT_AXIS_FONT_SIZE)
    fname = PATH_OUTPUT + fname
    if os.path.exists(fname): os.remove(fname)
    plt.savefig(fname, format='pdf')
    plt.close()

def board(rows, cols, value):
    M = dict()
    for row in range(rows):
        for col in range(cols):
            M[(row, col)] = deepcopy(value)
    return M

# GAME CLASS  =====================================================================================================================================

class Game:
    """ Main class to implement the board game """

    def __init__(self, config, eid=None):

        if not eid: eid = random.randint(100000, 1000000000)

        # Incoming Params
        self.config = config
        self.eid = eid

        # Default config        Note that order is (row,col) going from bottom-left corner of the board
        self.max_moves = 500
        self.rows = 7
        self.cols = 7
        self.start_pos = (3, 3)
        self.wall_positions = {(1, 1), (2, 1), (3, 1), (4, 1), (5, 1), (1, 2), (2, 2), (3, 2), (4, 2), (5, 2), (1, 4),
                               (2, 4), (3, 4), (4, 4), (5, 4), (1, 5), (2, 5), (3, 5), (4, 5), (5, 5)}
        self.door_positions = [(5, 3), (1, 3)]
        self.opened_doors = set()
        self.prize_positions = [(6, 3), (3, 0), (3, 6), (0, 3)]
        self.door_blocking = [2, 99999]
        self.allow_hit_wall = False
        self.allow_backtrack = False
        self.allow_chat = False
        self.walls_visible = True
        self.doors_visible = True
        self.block_reported = dict()
        self.door_opened = False

        assert self.config >= 1, 'Invalid Game Config no - ' + str(self.config)
        assert self.config <= 15, 'Invalid Game Config no - ' + str(self.config)

        # BOARD STATE
        self.ATTEMPTS = board(self.rows, self.cols, 0)  # count attempts into a position (successful or blocked)
        self.VISITS = board(self.rows, self.cols, 0)  # visits to given position
        self.PRIZES = board(self.rows, self.cols, 0)  # prizes at given position
        self.EARNINGS = board(self.rows, self.cols, 0)  # sum of all prize earnings at given position
        self.EXPERIENCE = board(self.rows, self.cols, [None, None, None, None])  # next position for each action

        # Custom config. Note prizes correspond to self.prize_positions, currently of top, left, right, bottom
        if self.config == 1:
            self.start_points = 150
            self.points_to_move = 0
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 250, 250, 250]

        if self.config == 2:
            self.start_points = 150
            self.points_to_move = 1
            self.prize_amounts = [[18, ], [18, ], [18, ], [18, ]]
            self.prize_freqs = [250, 250, 250, 250]

        if self.config == 3:
            self.start_points = 650
            self.points_to_move = 1
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 250, 250, 250]

        if self.config == 4:
            self.start_points = 150
            self.points_to_move = 0
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 125, 125, 500]

        if self.config == 5:
            self.start_points = 150
            self.points_to_move = 1
            self.prize_amounts = [[18, ], [18, ], [18, ], [18, ]]
            self.prize_freqs = [250, 125, 125, 500]

        if self.config == 6:
            self.start_points = 650
            self.points_to_move = 1
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 125, 125, 500]

        if self.config == 7:
            self.start_points = 150
            self.points_to_move = 0
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [142857, 125000, 339286, 392857]

        if self.config == 8:
            self.start_points = 150
            self.points_to_move = 1
            self.prize_amounts = [[18, ], [18, ], [18, ], [18, ]]
            self.prize_freqs = [142857, 125000, 339286, 392857]

        if self.config == 9:
            self.start_points = 650
            self.points_to_move = 1
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [142857, 125000, 339286, 392857]

        if self.config == 10:
            self.start_points = 150
            self.points_to_move = 0
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 0, 0, 750]

        if self.config == 11:
            self.start_points = 150
            self.points_to_move = 1
            self.prize_amounts = [[18, ], [18, ], [18, ], [18, ]]
            self.prize_freqs = [250, 0, 0, 750]

        if self.config == 12:
            self.start_points = 650
            self.points_to_move = 1
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [250, 0, 0, 750]

        if self.config == 13:
            self.start_points = 150
            self.points_to_move = 0
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [0, 0, 500, 500]

        if self.config == 14:
            self.start_points = 150
            self.points_to_move = 1
            self.prize_amounts = [[18, ], [18, ], [18, ], [18, ]]
            self.prize_freqs = [0, 0, 500, 500]

        if self.config == 15:
            self.start_points = 650
            self.points_to_move = 1
            self.prize_amounts = [[6, ], [6, ], [6, ], [6, ]]
            self.prize_freqs = [0, 0, 500, 500]

        # LISTS FOR TRACKING
        self.lst_actions = list()
        self.lst_prizes = list()
        self.lst_boards = list()
        self.lst_history = list()
        self.lst_positions = list()
        self.lst_waitimes = [0.0, ]
        self.lst_balance = [self.start_points]

        # START PLAY
        self.round_no = 0
        self.round_start = 0
        self.blocked = None
        self.backtrack = None
        self.prize_pos = None
        self.starttime = None
        self.lasttime = None
        self.stoptime = None
        self.completed = None
        self.prize_amount = None
        self.position = self.start_pos
        self.moves_left = self.max_moves

    # METHODS ----------------------------------------------------------------------

    def new_round(self):

        # DONE ?
        if self.completed: return

        # Start new round
        self.round_no += 1
        self.round_start = len(self.lst_actions)
        self.backtrack = None
        self.position = self.start_pos
        self.lst_positions.append(self.start_pos)
        self.VISITS[self.start_pos] += 1

        # Pick a Prize (can be stocastic in position and/or value)
        selectprizelist = list()
        for i in range(len(self.prize_freqs)):
            for j in range(self.prize_freqs[i]):
                selectprizelist.append(self.prize_positions[i])
        self.prize_pos = random.choice(selectprizelist)
        prize_index = self.prize_positions.index(self.prize_pos)
        self.prize_amount = random.choice(self.prize_amounts[prize_index])

        # Track history
        self.track()

    def act(self, action):
        """Attempts an action. Action may be denied if invalid, etc.
        Returns new position and reward earned at next position after action."""

        oldpos = self.position
        action = int(action)

        # Do not move if NOT a valid action
        if self.completed or action is None or action not in self.valid_actions(self.position):
            return oldpos, 0  # Do NOT history, since no change in history

        # Calc next position
        newpos = self.next_position(oldpos, action)
        self.blocked = None

        # If Wall
        if newpos in self.wall_positions:

            # If not allowed to hit walls, return
            if not self.allow_hit_wall:
                return oldpos, 0  # Do NOT history, since no change in history

            # Flag that the move into wall was blocked, and stay in place
            self.blocked = newpos
            newpos = oldpos

        # Record action and attempted move  (for statistical analysis)
        self.lst_actions.append(action)
        self.ATTEMPTS[newpos] += 1

        # CHECK FOR DOOR
        if newpos in self.door_positions:
            doorindex = self.door_positions.index(newpos)
            if self.ATTEMPTS[newpos] < self.door_blocking[doorindex]:
                self.blocked = newpos
                self.moves_left -= 1
                self.lst_balance.append(self.lst_balance[-1] - self.points_to_move)
                self.track(at_a_door=True)
                self.new_round()
                return self.start_pos, 0
            else:
                self.opened_doors.add(newpos)
                self.door_opened = True

        # MOVE
        self.position = newpos
        self.lst_positions.append(newpos)
        self.moves_left -= 1
        self.VISITS[newpos] += 1
        self.EXPERIENCE[oldpos][action] = newpos

        # pre-calculate what backtracking would entail on the next move (in case backtracking is disallowed)
        if newpos != oldpos:
            if action == LEFT:
                self.backtrack = RIGHT
            if action == UP:
                self.backtrack = DOWN
            if action == DOWN:
                self.backtrack = UP
            if action == RIGHT:
                self.backtrack = LEFT

        # ASSESS REWARD
        if self.position == self.prize_pos:
            reward = self.prize_amount
            self.lst_prizes.append(newpos)
            self.PRIZES[newpos] += 1
            self.EARNINGS[newpos] += reward
            self.lst_balance.append(self.lst_balance[-1] + self.prize_amount - self.points_to_move)
        else:
            reward = 0
            self.lst_balance.append(self.lst_balance[-1] - self.points_to_move)

        # DONE
        self.track()
        return newpos, reward  # return new pos and award earned for learning algos

    def track(self, at_a_door=False):

        # Calc Timers
        if not self.starttime:
            self.starttime = datetime.datetime.now()
            self.lasttime = datetime.datetime.now()
            self.stoptime = datetime.datetime.now()
        if not self.lasttime: self.lasttime = datetime.datetime.now()
        if not self.stoptime: self.stoptime = datetime.datetime.now()

        diff = datetime.datetime.now() - self.lasttime
        waittime = diff.total_seconds()
        self.lst_waitimes.append(waittime)

        self.lasttime = datetime.datetime.now()
        self.stoptime = datetime.datetime.now()

        # CAPTURE BOARD
        self.lst_boards.append(self.board_display_state())

        # CAPTURE COMBINED RESULTS IN HISTORY
        result = (self.eid,
                  self.config,
                  self.round_no,
                  self.moves_taken(),
                  self.lst_waitimes[-1],
                  self.last_action(),
                  self.lst_balance[-1],
                  self.position[0],
                  self.position[1],
                  self.prize_pos[0],
                  self.prize_pos[1],
                  int(bool(self.position == self.prize_pos)),
                  int(bool(self.position == self.start_pos)),
                  int(at_a_door)
                  )
        self.lst_history.append(result)

    def render(self, index=-1):
        txt = ''
        txt += "  EID:     " + str(self.eid) + "\n"
        txt += "  Config:  " + str(self.config) + "\n"
        txt += "  Bonus:   " + str(self.bonus()) + "\n"
        txt += "  Balance: " + str(self.lst_balance[index]) + "\n"
        txt += "  Explore: " + str(self.ATTEMPTS[(1, 3)]) + "\n"

        txt += self.board_display_text(index=index)
        print(txt)

    # PROPERTIES -------------------------------------------------------------------

    def board_display_state(self):
        B = np.zeros((self.rows, self.cols), dtype=np.int)
        for row in range((self.rows - 1), -1, -1):
            for col in range(self.cols):

                pos = (row, col)

                if pos in self.door_positions:
                    doorindex = self.door_positions.index(pos)
                    if self.ATTEMPTS[pos] < self.door_blocking[doorindex] and pos == self.blocked:
                        B[pos] = DISPLAY_DOOR_BLOCKED
                    elif self.ATTEMPTS[pos] < self.door_blocking[doorindex] and self.doors_visible:
                        B[pos] = DISPLAY_DOOR
                    else:
                        if pos == self.position:
                            B[pos] = DISPLAY_PLAYER
                        else:
                            B[pos] = 0
                elif pos in self.wall_positions:
                    if pos == self.blocked:
                        B[pos] = DISPLAY_WALL_HIT
                    else:
                        B[pos] = DISPLAY_WALL
                elif pos == self.prize_pos:
                    if pos == self.position:
                        B[pos] = DISPLAY_PRIZE_HIT
                    else:
                        B[pos] = DISPLAY_PRIZE
                elif pos == self.position:
                    B[pos] = DISPLAY_PLAYER
                else:
                    B[pos] = 0
        return B

    def board_display_text(self, index=-1):

        # Get Display State
        if not len(self.lst_boards):
            return "No Board state to display"
        if index >= len(self.lst_boards):
            return "Invalid Board state"
        try:
            display = self.lst_boards[index]
        except Exception as e:
            print("Tried index == " + str(index))
            print("Len of lst_boards == " + str(len(self.lst_boards)))
            raise e

        # Display Text
        txt = ""
        txt += ("-" * ((self.cols * GRID_CELL_WIDTH) + 2)) + "\n"
        for row in range((self.rows - 1), -1, -1):
            line = "|"
            for col in range(self.cols):
                pos = (row, col)

                if display[pos] == DISPLAY_PRIZE_HIT:
                    line += str.center("!!!", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_PRIZE:
                    line += str.center("P", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_DOOR_BLOCKED:
                    line += str.center("###", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_DOOR:
                    line += str.center("DDD", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_WALL:
                    line += str.center("***", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_PLAYER:
                    line += str.center("X", GRID_CELL_WIDTH, " ")

                elif display[pos] == DISPLAY_WALL_HIT:
                    line += str.center("###", GRID_CELL_WIDTH, " ")

                else:
                    line += str.center(" ", GRID_CELL_WIDTH, " ")

            txt += line + "|\n"
        txt += ("-" * ((self.cols * GRID_CELL_WIDTH) + 2)) + "\n"

        return txt

    def moves_taken(self):
        if self.lst_actions is None:
            return 0
        else:
            return int(len(self.lst_actions))

    def seconds(self):
        if self.stoptime is None or self.starttime is None: return 0
        delta = self.stoptime - self.starttime
        seconds = int(delta.total_seconds())
        return seconds

    def minutes(self):
        minutes = self.seconds() / float(60)
        minutes = round(minutes, 0)
        minutes = int(minutes)
        return minutes

    def balance(self):
        return self.lst_balance[-1]

    def bonus(self):
        b = str(self.balance())
        return "$" + b[:-2] + '.' + b[-2:]

    def next_position(self, oldpos, action):
        if action is None:
            return oldpos
        else:
            newpos = oldpos
            if action == UP and oldpos[0] < (self.rows - 1):
                newpos = (oldpos[0] + 1, oldpos[1])
            elif action == DOWN and oldpos[0] > 0:
                newpos = (oldpos[0] - 1, oldpos[1])
            elif action == LEFT and oldpos[1] > 0:
                newpos = (oldpos[0], oldpos[1] - 1)
            elif action == RIGHT and oldpos[1] < (self.cols - 1):
                newpos = (oldpos[0], oldpos[1] + 1)
            return newpos

    def last_action(self):
        if len(self.lst_actions):
            return self.lst_actions[-1]
        else:
            return -99

    # VALIDATION CHECKS --------------------------------------------------------------------

    def valid_actions(self, pos):
        actions = []
        for action in ALL_ACTIONS:
            if self.check_go_outside(pos, action):                                continue
            if self.check_go_wall(pos, action) and not self.allow_hit_wall:  continue
            if action == self.backtrack and not self.allow_backtrack:            continue
            actions.append(action)
        return actions

    def check_go_outside(self, pos, action):
        row = int(pos[0])
        col = int(pos[1])
        if action == LEFT and col == 0:              return True
        if action == UP and row == (self.rows - 1):  return True
        if action == DOWN and row == 0:              return True
        if action == RIGHT and col == (self.cols - 1):  return True
        return False

    def check_go_wall(self, pos, action):
        return self.next_position(pos, action) in self.wall_positions

    def check_go_door(self, pos, action):
        return self.next_position(pos, action) in self.door_positions

    # POST-PLAY ANALYSIS -------------------------------------------------------------------

    def data(self):
        out = ''
        for entry in self.lst_history:
            out += str(entry)[1:-1] + '\n'
        return out

    def count_prizes(self):
        return len(self.lst_prizes)

    def average_speed(self, trailing_periods=None):
        if self.lst_waitimes:
            if not trailing_periods:
                trailing_periods = len(self.lst_waitimes)
            if len(self.lst_waitimes) > trailing_periods:
                data = self.lst_waitimes[-trailing_periods:]
            else:
                data = self.lst_waitimes
            total = sum(data)
            count = len(data)
            avg = float(total) / float(count)
            return round(avg, 1)
        else:
            return 0.0

    def format_grid(self, VALUES, as_int=False, digits=None, supress_zeros=True, min_val=None):
        lines = list()
        lines.append("-" * ((self.cols * GRID_CELL_WIDTH) + 2))
        for row in range((self.rows - 1), -1, -1):
            line = "|"
            for col in range(self.cols):
                pos = (row, col)

                if pos in self.wall_positions:
                    line += "".center(GRID_CELL_WIDTH, " ")
                else:
                    val = VALUES[pos]
                    if val is None:
                        line += str('.').center(GRID_CELL_WIDTH, " ")
                    else:
                        if supress_zeros and abs(float(val)) < 0.01:
                            line += str('.').center(GRID_CELL_WIDTH, " ")
                        else:
                            if as_int:
                                line += str(int(val)).center(GRID_CELL_WIDTH, " ")
                            else:
                                if supress_zeros and abs(float(val)) < 0.01:
                                    line += str('.').center(GRID_CELL_WIDTH, " ")
                                else:
                                    line += str(format_val(float(val), min_val=min_val, digits=digits)).center(
                                        GRID_CELL_WIDTH, " ")
            lines.append(line + "| ")
        lines.append("-" * ((self.cols * GRID_CELL_WIDTH) + 2))
        return lines


# Q-LEARNING CLASS  =====================================================================================================================================

class Qmodel:
    """-------------|----------------------------------|-----|---------------------------------------------------------
        PARAMETER   | PURPOSE                          | MIN | MAX
       -------------|----------------------------------|-----|---------------------------------------------------------
        alpha       | learning rate                    |  0  | 1.0  re-learn immediately from recent experience
        beta        | foresight probability            |  0  | 1.0  learn only from foresight
        epsilon     | random exploration rate          |  0  | 1.0  random walk
        gamma       | continuation rate                |  0  | 1.0  full value of continuation (no discounting at 1.0)
        tau         | softmax temperature              |  0  | > 0  random as -> infinity; argmax at 0
        boot        | bootstraps                       |  0  | > 0  num bootstraps during indirect actual learning
        cost        | cost per move                    |  0  | > 0 cost per move
        aversion    | loss aversion                    |  1  | > 1 multiplier of losses versus gains
       -------------|----------------------------------|-----|---------------------------------------------------------
    """

    # INITIALIZE -------------------------------------------------------------------------------------------------

    def __init__(self, sim_no, config, alpha, beta, boot, epsilon, gamma, tau, cost, aversion, alpha_decay=0.0, beta_decay=0.0, boot_decay=0.0, gamma_decay=0.0, epsilon_decay=0.0, tau_decay=0.0, debug=False):

        # Capture parameters
        self.sim_no           = sim_no
        self.config           = config

        self.alpha            = alpha
        self.beta             = beta
        self.epsilon          = epsilon
        self.gamma            = gamma
        self.tau              = tau
        self.boot             = boot
        self.cost             = cost
        self.aversion         = aversion

        self.alpha_decay      = alpha_decay
        self.beta_decay       = beta_decay
        self.epsilon_decay    = epsilon_decay
        self.gamma_decay      = gamma_decay
        self.tau_decay        = tau_decay
        self.boot_decay       = boot_decay

        self.penalty          = -1 * (cost * aversion)

        # Starting values of parameters that can decay
        self.startval_alpha   = alpha
        self.startval_beta    = beta
        self.startval_epsilon = epsilon
        self.startval_gamma   = gamma
        self.startval_tau     = tau
        self.startval_boot    = boot

        # Init Game
        self.g                = Game(config, sim_no)

        # Init Q
        self.Q                = board(self.g.rows, self.g.cols, [0, 0, 0, 0])

        # Updating
        self.updates          = [ ]
        self.softmax_rand     = [ ]

        # Tracking
        self.history = defaultdict(list)
        self.history['alpha']        = [0 for _ in range(GAME_LENGTH)]
        self.history['beta']         = [0 for _ in range(GAME_LENGTH)]
        self.history['boot']         = [0 for _ in range(GAME_LENGTH)]
        self.history['epsilon']      = [0 for _ in range(GAME_LENGTH)]
        self.history['gamma']        = [0 for _ in range(GAME_LENGTH)]
        self.history['tau']          = [0 for _ in range(GAME_LENGTH)]

        self.history['down_prob']    = [0 for _ in range(GAME_LENGTH)]
        self.history['softmax']      = [0 for _ in range(GAME_LENGTH)]
        self.history['q_value_up']   = [0 for _ in range(GAME_LENGTH)]
        self.history['q_value_down'] = [0 for _ in range(GAME_LENGTH)]

        # Header
        if debug:
            print('\n'* 5)
            print('*' * 200)
            print('\n'* 5)
            print('RUN SIMULATION ' + '\n' +
                  '-----------------------------------' + '\n' +
                  '    Sim No        = ' + str(sim_no) + '\n' +
                  '    Config        = ' + str(config) + '\n' +
                  '\n' +
                  '    Alpha         = ' + str(alpha) + '\n' +
                  '    Beta          = ' + str(beta) + '\n' +
                  '    Boot          = ' + str(boot) + '\n' +
                  '    Epsilon       = ' + str(epsilon) + '\n' +
                  '    Gamma         = ' + str(gamma) + '\n' +
                  '    Tau           = ' + str(tau) + '\n' +
                  '    Cost          = ' + str(cost) + '\n' +
                  '    Aversion      = ' + str(aversion) + '\n' +
                                                                '\n' +
                  '    Decay Alpha   = ' + str(alpha_decay) + '\n' +
                  '    Decay Beta    = ' + str(beta_decay) + '\n' +
                  '    Decay Boot    = ' + str(boot_decay) + '\n' +
                  '    Decay Epsilon = ' + str(epsilon_decay) + '\n' +
                  '    Decay Gamma   = ' + str(gamma_decay) + '\n' +
                  '    Decay Tau     = ' + str(tau_decay) + '\n' +
                  '')

        # Initialize the start of a new round
        self.g.new_round()


    # SIMULATE ---------------------------------------------------------------------------------------------------

    def simulate(self, debug=False):

        while self.g.moves_left > 0:
            self.track()          # Track history and dynamics so we can plot
            self.act(debug)
            self.decay_params()   #


    # ACT --------------------------------------------------------------------------------------------------------

    def act(self, debug=False):

        # Prep
        position = self.g.position
        actions  = self.g.valid_actions(position)
        values   = [self.Q[self.g.position][action] for action in actions]
        selected = softmax_select_idx(values, tau=self.tau)
        action   = actions[selected]

        # Disallow hitting doors from wrong side in simulations (can cause circular continuation behavior)
        if position == FORESIGHT_PRIZE_POS and action == UP:
            action = self.g.last_action()

        # Track whether the selected action was actually maximal
        if position == START_POS:
            self.softmax_rand.append(int(values[selected] != max(values)))

        # Act
        nextpos, reward = self.g.act(action)

        # Update Q-Value
        self.updates = []
        self.update_direct(position, action, nextpos, reward)
        self.update_indirect()

        # New round when hit a prize
        if self.g.position == self.g.prize_pos:
            self.g.new_round()

        # Debug
        if debug:
            self.debug()


    # UPDATING ---------------------------------------------------------------------------------------------------

    def decay_params(self):
        """ Decay model parameters """

        pct_done   = self.g.moves_taken() / self.g.max_moves

        self.alpha   = decay(self.startval_alpha,   pct_done, self.alpha_decay)
        self.beta    = decay(self.startval_beta,    pct_done, self.beta_decay)
        self.boot    = decay(self.startval_boot, pct_done, self.boot_decay)
        self.epsilon = decay(self.startval_epsilon, pct_done, self.epsilon_decay)
        self.gamma   = decay(self.startval_gamma,   pct_done, self.gamma_decay)
        self.tau     = decay(self.startval_tau,     pct_done, self.tau_decay)

    def update_direct(self, position, action, nextpos, reward):
        """ Update Q-value based on observed: (state, action, nextstate, reward) """

        # Update only after a door has been opened
        # if not self.g.door_opened: return

        # Calc continuation - handle special case of hitting a closed door, which will send continuation upward from start
        if reward:
            nextval = 0
        else:
            if position == BELOW_START_POS and action == DOWN:
                nextval = max( (self.Q[START_POS][UP], 0) )
            else:
                nextval = self.get_max_q_value_from_pos(nextpos)

        # Calculate updated Q
        prior      = self.Q[position][action]
        discounted = self.gamma * nextval
        learned    = reward + self.penalty + discounted
        updated    = ((1 - self.alpha) * prior) + (self.alpha * learned)
        change     = updated - prior

        # Check validity of update
        self.validate('direct updating ', position, action, nextpos, reward, nextval, discounted, learned, prior, updated, change)

        # Update
        self.Q[position][action] = updated

        # Track
        self.track_update('DIRECT', position, action, nextpos, reward, nextval, discounted, learned, prior, updated, change)

    def update_indirect(self):
        """ Update Q-value by bootstrap reinforcement. """

        # Update only after a door has been opened
        if not self.g.door_opened: return

        # ACTUAL
        if random.random() > self.beta:

            while (len(self.updates) < self.boot):

                self.update_indirect_model(model=self.g.EXPERIENCE)

        # FORESIGHT
        else:

            foresight = copy.deepcopy(self.g.EXPERIENCE)
            foresight[BELOW_START_POS][DOWN] = FORESIGHT_PRIZE_POS

            while (len(self.updates) < self.boot):

                self.update_indirect_model(model=foresight)

    def update_indirect_model(self, model):
        """ Reinforce a randomly selected (state, action) --> next_state, based on given model of the environment.
                The model can be an actual model, or a foresight (hypothetical) model of the state action space
                The model must be a dictionary that points to a complete list of all actions that can be taken in the space (realized or not)
                It a state, action has not been realized in the model, then it is skipped
        """

        position = random.choice(list(model))
        action   = random.choice(ALL_ACTIONS)
        nextpos  = model[position][action]

        while nextpos is None:
            position = random.choice(list(model))
            action = random.choice(ALL_ACTIONS)
            nextpos = model[position][action]

        if position == BELOW_START_POS and nextpos == FORESIGHT_PRIZE_POS:
            flag = 'Foresight'
        elif position == START_POS and nextpos == BELOW_START_POS:
            flag = "Start Down"
        else:
            flag = 'A'

        # Calc reward, scaled to probability of obtaining it at nextpos
        total_earnings = sum( [value for key, value in self.g.EARNINGS.items()])

        if self.g.VISITS[nextpos] and total_earnings:
            prob_reward = self.g.EARNINGS[nextpos] / total_earnings
            reward      = prob_reward * self.g.prize_amount
            nextval     = (1 - prob_reward) * self.get_max_q_value_from_pos(nextpos)
        else:
            reward = 0
            nextval = self.get_max_q_value_from_pos(nextpos)

        # Calculate updated Q
        prior      = self.Q[position][action]
        discounted = self.gamma * nextval
        learned    = reward + self.penalty + discounted
        updated    = ((1 - self.alpha) * prior) + (self.alpha * learned)
        change     = updated - prior

        # Check validity of update
        self.validate('indirect updating ' + flag, position, action, nextpos, reward, nextval, discounted, learned, prior, updated, change)

        # Update
        self.Q[position][action] = updated

        # Track
        self.track_update(flag, position, action, nextpos, reward, nextval, discounted, learned, prior, updated, change)


    # TRACKING ---------------------------------------------------------------------------------------------------

    def track(self):
        """ Helper function to history the internal dynamics of the q-learning simulation. """

        # Stop tracking at end of game
        if self.g.moves_taken() >= GAME_LENGTH: return

        # Track probability of exploration at start pos (hypothetical because agent may not actually be there)
        startpos_qvals = [self.Q[START_POS][action] for action in [UP, DOWN] ]
        startpos_probs = softmax(startpos_qvals, tau=self.tau)
        self.history['down_prob'][self.g.moves_taken()] = startpos_probs[-1]
        if len(self.softmax_rand):
            if len(self.softmax_rand) == 1:
                for i in range(self.g.moves_taken() + 1):
                    self.history['softmax'][i] = float(sum(self.softmax_rand)) / len(self.softmax_rand)
            else:
                self.history['softmax'][self.g.moves_taken()] = float(sum(self.softmax_rand)) / len(self.softmax_rand)

        # Track Q Value of going UP/DOWN from starting position
        self.history['q_value_up'][self.g.moves_taken()] = self.Q[START_POS][UP]
        self.history['q_value_down'][self.g.moves_taken()] = self.Q[START_POS][DOWN]

        # Track parameters that decay
        self.history['alpha'][self.g.moves_taken()]  = self.alpha
        self.history['beta'][self.g.moves_taken()]  = self.beta
        self.history['boot'][self.g.moves_taken()]  = self.boot
        self.history['epsilon'][self.g.moves_taken()]  = self.epsilon
        self.history['gamma'][self.g.moves_taken()] = self.gamma
        self.history['tau'][self.g.moves_taken()]   = self.tau

    def track_update(self, update_type, pos, action, nextpos, reward, nextval, discounted, learned, prior, updated, change):
        self.updates.append(
            [ update_type,
              pos,
              action2str(action),
              nextpos,
              format_val(prior, digits=2, supress_zero=False),
              format_val(reward, digits=1, supress_zero=True),
              format_val(self.penalty, digits=2),
              format_val(nextval),
              format_val(discounted),
              format_val(learned, digits=4, supress_zero=True),
              format_val(updated, digits=2, supress_zero=True),
              format_val(change, digits=4, supress_zero=True)
              ])

    def validate(self, update_type, pos, action, nextpos, reward, nextval, discounted, learned, prior, updated, change):
        if updated > self.g.prize_amount:
            print('ERROR in ' + update_type)
            print('pos',        pos)
            print('action',     action)
            print('nextpos',    nextpos)
            print('reward',     reward)
            print('prior',      prior)
            print('nextval',    nextval)
            print('discounted', discounted)
            print('learned',    learned)
            print('updated',    updated)
            print('change',     change)
            assert updated <= self.g.prize_amount, 'Q value exceeded maximum reward'


    # GET INTERNAL DATA ------------------------------------------------------------------------------------------

    def get_max_q_value_from_pos(self, position):
        """Return the max scalar q value for next position. Do/do not allow foresight."""

        # Check all actions and return highest q, but ignore cases where value of (state, action) is None
        value = None
        for action in ALL_ACTIONS:
            if self.g.EXPERIENCE[position] is not None:
                if value is None:
                    value = self.Q[position][action]
                elif self.Q[position][action] > value:
                    value = self.Q[position][action]
        return value

    def get_min_q_value_from_pos(self, position):
        """Return the max scalar q value for next position. Do/do not allow foresight."""

        # Check all actions and return lowest q, but ignore cases where value of (state, action) is None
        value = None
        for action in ALL_ACTIONS:
            if self.g.EXPERIENCE[position] is not None:
                if value is None:
                    value = self.Q[position][action]
                elif self.Q[position][action] < value:
                    value = self.Q[position][action]
        return value

    def get_Q_matrix_max(self):
        """ Return the entire Q Matrix """
        M = board(self.g.rows, self.g.cols, 0)
        for row in range(self.g.rows):
            for col in range(self.g.cols):
                M[ (row, col) ] = self.get_max_q_value_from_pos((row, col))
        return M

    def get_Q_matrix_min(self):
        """ Return the entire Q Matrix """
        M = board(self.g.rows, self.g.cols, 0)
        for row in range(self.g.rows):
            for col in range(self.g.cols):
                M[ (row, col) ] = self.get_min_q_value_from_pos((row, col))
        return M

    def get_Q_matrix_by_action(self, action):
        """ Return the entire Q Matrix """
        M = board(self.g.rows, self.g.cols, 0)
        for row in range(self.g.rows):
            for col in range(self.g.cols):
                M[ (row, col) ] = self.Q[row, col][action]
        return M

    def get_Max_Q_any_pos_any_action(self):
        val = -9999
        M = self.get_Q_matrix_max()
        for pos in M:
            if M[pos] is not None:
                if M[pos] > val: val = M[pos]
        return val

    def get_Min_Q_any_pos_any_action(self):
        val = 9999
        M = self.get_Q_matrix_min()
        for pos in M:
            if M[pos] is not None:
               if M[pos] < val: val = M[pos]
        return val


    # DEBUG ------------------------------------------------------------------------------------------------------

    def debug(self):

        DEBUG_BOOT_COL_WIDTH = 11
        DEBUG_BOARD_WIDTH = 50
        DEBUG_WINDOW_WIDTH = DEBUG_BOARD_WIDTH * 4
        UPDATE_COL_HEADERS = ['UPDATES', '(row,col)', 'Action', 'Next', 'OLD VAL', 'Reward', 'Penalty', 'Next',
                              'Discount', 'Learned', 'NEW VAL', 'Diff']
        UPDATE_COL_COUNT = len(UPDATE_COL_HEADERS)
        UPDATE_HEADER_ROWS = [['-' * DEBUG_BOOT_COL_WIDTH for _ in range(UPDATE_COL_COUNT)],
                              [str(cname).center(DEBUG_BOOT_COL_WIDTH) for cname in UPDATE_COL_HEADERS],
                              ['-' * DEBUG_BOOT_COL_WIDTH for _ in range(UPDATE_COL_COUNT)]]

        # Header
        print('\n' * 10)
        print(str("Config " + str(self.config) + ' ' +
                  "| Prize" + str(self.g.prize_amount) +
                  "| Alpha" + format_val(self.alpha) +
                  "| Beta" + format_val(self.beta) +
                  "/ Rho" + format_val(self.beta_decay) +
                  "| Tau" + format_val(self.tau) +
                  "/ Omega" + format_val(self.tau_decay) +
                  "| Gamma" + format_val(self.gamma) +
                  "/ Kappa" + format_val(self.gamma_decay) +
                  "| Boot" + format_val(self.boot) +
                  "/ Psi" + format_val(self.boot_decay)
                  ).center(DEBUG_WINDOW_WIDTH))

        print(str(" Max Q" + format_val(self.get_Max_Q_any_pos_any_action()) +
                  "| Min Q " + format_val(self.get_Min_Q_any_pos_any_action()) +
                  "| Penalty " + format_val(self.penalty, digits=3) +
                  "| Non-optimal " + str(self.g.moves_taken() - sum(self.softmax_rand))
                  ).center(DEBUG_WINDOW_WIDTH))
        print()

        # Completed Action
        optimality_status = "  ****** SUB-OPTIMAL ******" if not self.softmax_rand[-1] else ""
        print(str("MOVE # " + str(len(self.g.lst_actions)) + ' ' + action2str(self.g.last_action()).upper() + ' ' +
              str(self.g.lst_positions[-2]) + ' -> ' + str(self.g.position) + optimality_status).center(DEBUG_WINDOW_WIDTH))


        # Bootstraps
        for row in UPDATE_HEADER_ROWS:
            line = ''
            for item in row:
                line += str(item).center(DEBUG_BOOT_COL_WIDTH)
            print(str(line).center(DEBUG_WINDOW_WIDTH))

        table = []
        if self.updates:
            table.append(self.updates[0])
            if len(self.updates) > 1:
                table.extend(sorted(self.updates[1:], key = itemgetter(1)))

        for i in range(int(self.boot) + 1):
            if i < len(table):
                line = ''
                for item in table[i]:
                    line += str(item).center(DEBUG_BOOT_COL_WIDTH)
                print(str(line).center(DEBUG_WINDOW_WIDTH))
            else:
                print()
        print(str('-' * UPDATE_COL_COUNT * DEBUG_BOOT_COL_WIDTH).center(DEBUG_WINDOW_WIDTH))


        # Q-Values by Action
        print()
        print()
        print(str('Q LEFT').center(DEBUG_BOARD_WIDTH) + str('Q UP').center(DEBUG_BOARD_WIDTH) + str('Q DOWN').center(DEBUG_BOARD_WIDTH) + str('Q RIGHT').center(DEBUG_BOARD_WIDTH))
        left  = self.g.format_grid(self.get_Q_matrix_by_action(LEFT))
        up    = self.g.format_grid(self.get_Q_matrix_by_action(UP))
        down  = self.g.format_grid(self.get_Q_matrix_by_action(DOWN))
        right = self.g.format_grid(self.get_Q_matrix_by_action(RIGHT))
        for i in range(len(up)):
            print(str(left[i]).center(DEBUG_BOARD_WIDTH) + str(up[i]).center(DEBUG_BOARD_WIDTH) + str(down[i]).center(DEBUG_BOARD_WIDTH) + str(right[i]).center(DEBUG_BOARD_WIDTH))


        # Board Values
        print()
        print()
        valid_acts    = [action2str(action) for action in self.g.valid_actions(self.g.position)]
        valid_values  = [self.Q[self.g.position][action] for action in self.g.valid_actions(self.g.position)]
        options_start = 'Start Up ' + format_val(self.Q[START_POS][UP]) + '  vs. Start Down ' + format_val(self.Q[START_POS][DOWN])
        options_now   = ''
        for opt in zip(valid_acts, valid_values):
            if opt[1] is None:
                options_now += str(opt[0]) + ' ' + 'None'
            else:
                options_now += str(opt[0]) + ' ' + format_val(opt[1], digits=2) + '  '
        print(str('GAME Options: ' + options_now).center(DEBUG_BOARD_WIDTH) + 'MAX Q'.center(DEBUG_BOARD_WIDTH) + 'VISITS'.center(DEBUG_BOARD_WIDTH) + 'PRIZES'.center(DEBUG_BOARD_WIDTH))
        position  = self.g.board_display_text().split('\n')
        visits    = self.g.format_grid(self.g.VISITS, as_int=True)
        prizes    = self.g.format_grid(self.g.PRIZES, as_int=True)
        max_q     = self.g.format_grid(self.get_Q_matrix_max(), min_val=self.penalty)

        for i in range(len(max_q)):
            print(str(position[i]).center(DEBUG_BOARD_WIDTH) + str(max_q[i]).center(DEBUG_BOARD_WIDTH) + str(visits[i]).center(DEBUG_BOARD_WIDTH) + str(prizes[i]).center(DEBUG_BOARD_WIDTH))


        # Add line for Options:  Options at cur position and options at start (i.e., up/down)
        print(str(options_start).center(DEBUG_BOARD_WIDTH) + ' '.ljust(DEBUG_BOARD_WIDTH) + ' '.ljust(DEBUG_BOARD_WIDTH) + ' '.ljust(DEBUG_BOARD_WIDTH))
        print()

        # Pause
        input()


# SIMULATION FUNCTIONS  ===========================================================================================================================================

def simulate(nsims, config, alpha, beta, boot, epsilon, gamma, tau, cost, aversion, alpha_decay=0.0, beta_decay=0.0, boot_decay=0.0, gamma_decay=0.0, epsilon_decay=0.0, tau_decay=0.0, debug=False):
    sims = list()
    for sim_no in range(nsims):
        q = Qmodel(sim_no=sim_no, config=config,
                   alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=aversion,
                   alpha_decay=alpha_decay, beta_decay=beta_decay, gamma_decay=gamma_decay, epsilon_decay=epsilon_decay, tau_decay=tau_decay, boot_decay=boot_decay,
                   debug=debug)
        q.simulate(debug=debug)
        sims.append(q)
    return sims

def simulate_12_10(nsims, sim_id, alpha, beta, boot, epsilon, gamma, tau, cost, aversion, alpha_decay=0.0, beta_decay=0.0, boot_decay=0.0, gamma_decay=0.0, epsilon_decay=0.0, tau_decay=0.0, prefix='', debug=False, threshold_to_save=999999):

    # Simulate Config 12 & 10
    sims_12 = simulate(
        nsims=nsims, config=12, debug=debug,
        alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=aversion,
        alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)

    sims_10 = simulate(
        nsims=nsims, config=10, debug=debug,
        alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=NO_AVERSION,
        alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)

    # Extract games
    games_12 = [q.g for q in sims_12]
    games_10 = [q.g for q in sims_10]
    error    = calc_error(games_12, games_10, HUMAN_RESULTS_12, HUMAN_RESULTS_10)

    if error < threshold_to_save:

        # Save notes
        notes = format_notes(
            nsims=nsims, sim_id=sim_id,
            alpha=alpha, beta=beta, epsilon=epsilon, gamma=gamma, tau=tau, boot=boot, cost=cost, aversion=aversion,
            alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)

        fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - NOTES.txt'
        save_str(notes, fname)

        # Save data
        data_12  = extract_exploration(games_12)
        csv_12   = [(col, data_12[row][col]) for row in range(len(data_12)) for col in range(len(data_12[row]))]
        fname    = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - DATA Config 12.csv'
        save_csv(csv_12, fname)

        data_10  = extract_exploration(games_10)
        csv_10   = [(col, data_10[row][col]) for row in range(len(data_10)) for col in range(len(data_10[row]))]
        fname    = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - DATA Config 10.csv'
        save_csv(csv_10, fname)

        # Plot comparison
        avg_12 = avg_exploration_by_step(games_12)
        avg_10 = avg_exploration_by_step(games_10)
        fname  = prefix + 'SIM ' + str(sim_id) + ' - ERROR ' + str(error).rjust(10, '0') + ' - Compare 12.10.pdf'
        plot_comparison(label_a='Losses', data_a=avg_12, label_b='Gains', data_b=avg_10, fname=fname)

    return sims_12, sims_10

def simulate_3_1(nsims, sim_id, alpha, beta, boot, epsilon, gamma, tau, cost, aversion, alpha_decay=0.0, beta_decay=0.0, boot_decay=0.0, gamma_decay=0.0, epsilon_decay=0.0, tau_decay=0.0, prefix='', debug=False, threshold_to_save=999999):

    # Simulate Config 3 & 1
    sims_3 = simulate(
        nsims=nsims, config=3, debug=debug,
        alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=aversion,
        alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)

    sims_1 = simulate(
        nsims=nsims, config=1, debug=debug,
        alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=NO_AVERSION,
        alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)

    # Extract games
    games_3 = [q.g for q in sims_3]
    games_1 = [q.g for q in sims_1]
    error   = calc_error(games_3, games_1, HUMAN_RESULTS_3, HUMAN_RESULTS_1)

    if error < threshold_to_save:

        # Save notes
        notes = format_notes(
            nsims=nsims, sim_id=sim_id,
            alpha=alpha, beta=beta, boot=boot, epsilon=epsilon, gamma=gamma, tau=tau, cost=cost, aversion=aversion,
            alpha_decay=alpha_decay, beta_decay=beta_decay, boot_decay=boot_decay, epsilon_decay=epsilon_decay, gamma_decay=gamma_decay, tau_decay=tau_decay)
        fname = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - NOTES.txt'
        save_str(notes, fname)

        # Save data
        data_3  = extract_exploration(games_3)
        csv_3   = [(col, data_3[row][col]) for row in range(len(data_3)) for col in range(len(data_3[row]))]
        fname   = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - DATA Config 3.csv'
        save_csv(csv_3, fname )

        data_1  = extract_exploration(games_1)
        csv_1   = [(col, data_1[row][col]) for row in range(len(data_1)) for col in range(len(data_1[row]))]
        fname   = PATH_OUTPUT + prefix + 'SIM ' + str(sim_id) + ' - DATA Config 1.csv'
        save_csv(csv_1, fname)

        # Plot comparison
        avg_3 = avg_exploration_by_step(games_3)
        avg_1 = avg_exploration_by_step(games_1)
        fname = prefix + 'SIM ' + str(sim_id) + ' - ERROR ' + str(error).rjust(10, '0') + ' - Compare 3.1.pdf'
        plot_comparison(label_a='Losses', data_a=avg_3, label_b='Gains', data_b=avg_1, fname=fname)

    return sims_3, sims_1

def sim_experimental_results():

    # Config 12 vs 10
    sim_id_12_10 = next_sim_id()
    sims_12, sims_10 = simulate_12_10(
        nsims=N_SIMS, sim_id=sim_id_12_10, debug=DEBUG,
        alpha=ALPHA, beta=BETA, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=AVERSION,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)

    plot_q_values(sims_12, sims_10, sim_id=sim_id_12_10)
    plot_prob_down(sims_12, sims_10, sim_id=sim_id_12_10)

    # Plot Param Trajectories
    plot_parameter(sims_12, 'alpha',   sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'beta',    sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'epsilon', sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'gamma',   sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'tau',     sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'boot',    sim_id=sim_id_12_10)
    plot_parameter(sims_12, 'softmax', sim_id=sim_id_12_10, ylabel='Softmax Sub-optimal')

    # Config 3 vs 1
    sim_id_3_1 = next_sim_id()
    sims_3, sims_1 = simulate_3_1(
        nsims=N_SIMS, sim_id=sim_id_3_1, debug=DEBUG,
        alpha=ALPHA, beta=BETA, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=AVERSION,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)

    plot_q_values(sims_3, sims_1, sim_id=sim_id_3_1)
    plot_prob_down(sims_3, sims_1, sim_id=sim_id_3_1)

def sim_counterfactual_results():

    # COUNTERFACTUAL: AVERSION = 1 for all -----------------------------------------------------

    prefix = 'Counterfactual A1 - '

    # Config 12 vs 10
    sim_id_12_10 = next_sim_id()
    sims_12, sims_10 = simulate_12_10(
        nsims=N_SIMS, sim_id=sim_id_12_10, debug=DEBUG, prefix=prefix,
        alpha=ALPHA, beta=BETA, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=1,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)

    plot_q_values(sims_12, sims_10, sim_id=sim_id_12_10, prefix=prefix)
    plot_prob_down(sims_12, sims_10, sim_id=sim_id_12_10, prefix=prefix)

    # Config 3 vs 1
    sim_id_3_1 = next_sim_id()
    sims_3, sims_1 = simulate_3_1(
        nsims=N_SIMS, sim_id=sim_id_3_1, debug=DEBUG, prefix=prefix,
        alpha=ALPHA, beta=BETA, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=1,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)

    plot_q_values(sims_3, sims_1, sim_id=sim_id_3_1, prefix=prefix)
    plot_prob_down(sims_3, sims_1, sim_id=sim_id_3_1, prefix=prefix)


    # COUNTERFACTUAL: Beta = 0 for all -----------------------------------------------------

    prefix = 'Counterfactual B0 - '

    # Config 12 vs 10
    sim_id_12_10 = next_sim_id()
    simulate_12_10(
        nsims=N_SIMS, sim_id=sim_id_12_10, debug=DEBUG, prefix=prefix,
        alpha=ALPHA, beta=0, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=AVERSION,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)

    # Config 3 vs 1
    sim_id_3_1 = next_sim_id()
    simulate_3_1(
        nsims=N_SIMS, sim_id=sim_id_3_1, debug=DEBUG, prefix=prefix,
        alpha=ALPHA, beta=0, boot=BOOT, epsilon=EPSILON, gamma=GAMMA, tau=TAU, cost=COST, aversion=AVERSION,
        alpha_decay=ALPHA_DECAY, beta_decay=BETA_DECAY, boot_decay=BOOT_DECAY, epsilon_decay=EPSILON_DECAY, gamma_decay=GAMMA_DECAY, tau_decay=TAU_DECAY)


# COMMANDLINE  ===============================================================================================================================================
if __name__ == "__main__":

    sim_experimental_results()

    sim_counterfactual_results()

